diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..84c84d3128 Binary files /dev/null and b/.gitignore differ diff --git a/Algo256/cuda_blake256.cu b/Algo256/cuda_blake256.cu new file mode 100644 index 0000000000..f0af0fcdc9 --- /dev/null +++ b/Algo256/cuda_blake256.cu @@ -0,0 +1,251 @@ +/** + * Blake-256 Cuda Kernel (Tested on SM 5.0) + * + * Tanguy Pruvot - Nov. 2014 + */ + +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +extern int compute_version[8]; +#include "cuda_helper.h" + +__constant__ static uint32_t c_data[20]; + +__constant__ static uint32_t sigma[16][16]; +static uint32_t c_sigma[16][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } +}; + + +static const uint32_t c_IV256[8] = { + 0x6A09E667, 0xBB67AE85, + 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, + 0x1F83D9AB, 0x5BE0CD19 +}; + +__device__ __constant__ static uint32_t cpu_h[8]; + +__device__ __constant__ static uint32_t u256[16]; +static const uint32_t c_u256[16] = { + 0x243F6A88, 0x85A308D3, + 0x13198A2E, 0x03707344, + 0xA4093822, 0x299F31D0, + 0x082EFA98, 0xEC4E6C89, + 0x452821E6, 0x38D01377, + 0xBE5466CF, 0x34E90C6C, + 0xC0AC29B7, 0xC97C50DD, + 0x3F84D5B5, 0xB5470917 +}; + +#define GS2(a,b,c,d,x) { \ + const uint32_t idx1 = sigma[r][x]; \ + const uint32_t idx2 = sigma[r][x+1]; \ + v[a] += (m[idx1] ^ u256[idx2]) + v[b]; \ + v[d] = SPH_ROTL32(v[d] ^ v[a], 16); \ + v[c] += v[d]; \ + v[b] = SPH_ROTR32(v[b] ^ v[c], 12); \ +\ + v[a] += (m[idx2] ^ u256[idx1]) + v[b]; \ + v[d] = SPH_ROTR32(v[d] ^ v[a], 8); \ + v[c] += v[d]; \ + v[b] = SPH_ROTR32(v[b] ^ v[c], 7); \ +} +//#define ROTL32(x, n) ((x) << (n)) | ((x) >> (32 - (n))) +#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define hostGS(a,b,c,d,x) { \ + const uint32_t idx1 = c_sigma[r][x]; \ + const uint32_t idx2 = c_sigma[r][x+1]; \ + v[a] += (m[idx1] ^ c_u256[idx2]) + v[b]; \ + v[d] = ROTR32(v[d] ^ v[a], 16); \ + v[c] += v[d]; \ + v[b] = ROTR32(v[b] ^ v[c], 12); \ +\ + v[a] += (m[idx2] ^ c_u256[idx1]) + v[b]; \ + v[d] = ROTR32(v[d] ^ v[a], 8); \ + v[c] += v[d]; \ + v[b] = ROTR32(v[b] ^ v[c], 7); \ + } + +/* Second part (64-80) msg never change, store it */ +__device__ __constant__ static const uint32_t c_Padding[16] = { + 0, 0, 0, 0, + 0x80000000, 0, 0, 0, + 0, 0, 0, 0, + 0, 1, 0, 640, +}; + +__host__ __forceinline__ static void blake256_compress1st(uint32_t *h, const uint32_t *block, const uint32_t T0) +{ + uint32_t m[16]; + uint32_t v[16]; + + + for (int i = 0; i < 16; i++) { + m[i] = block[i]; + } + + + for (int i = 0; i < 8; i++) + v[i] = h[i]; + + v[8] = c_u256[0]; + v[9] = c_u256[1]; + v[10] = c_u256[2]; + v[11] = c_u256[3]; + + v[12] = c_u256[4] ^ T0; + v[13] = c_u256[5] ^ T0; + v[14] = c_u256[6]; + v[15] = c_u256[7]; + + + for (int r = 0; r < 14; r++) { + /* column step */ + hostGS(0, 4, 0x8, 0xC, 0x0); + hostGS(1, 5, 0x9, 0xD, 0x2); + hostGS(2, 6, 0xA, 0xE, 0x4); + hostGS(3, 7, 0xB, 0xF, 0x6); + /* diagonal step */ + hostGS(0, 5, 0xA, 0xF, 0x8); + hostGS(1, 6, 0xB, 0xC, 0xA); + hostGS(2, 7, 0x8, 0xD, 0xC); + hostGS(3, 4, 0x9, 0xE, 0xE); + } + + for (int i = 0; i < 16; i++) { + int j = i & 7; + h[j] ^= v[i]; + } +} + +void blake256_cpu_init(int thr_id, int threads) +{ + + cudaMemcpyToSymbol(u256, c_u256, sizeof(c_u256), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(sigma, c_sigma, sizeof(c_sigma), 0, cudaMemcpyHostToDevice); +} + +__device__ __forceinline__ static void blake256_compress2nd(uint32_t *h, const uint32_t *block, const uint32_t T0) +{ + uint32_t m[16]; + uint32_t v[16]; + + m[0] = block[0]; + m[1] = block[1]; + m[2] = block[2]; + m[3] = block[3]; + +#pragma unroll + for (int i = 4; i < 16; i++) { + m[i] = c_Padding[i]; + } + +#pragma unroll 8 + for (int i = 0; i < 8; i++) + v[i] = h[i]; + + v[8] = u256[0]; + v[9] = u256[1]; + v[10] = u256[2]; + v[11] = u256[3]; + + v[12] = u256[4] ^ T0; + v[13] = u256[5] ^ T0; + v[14] = u256[6]; + v[15] = u256[7]; + +#pragma unroll 14 + for (int r = 0; r < 14; r++) { + /* column step */ + GS2(0, 4, 0x8, 0xC, 0x0); + GS2(1, 5, 0x9, 0xD, 0x2); + GS2(2, 6, 0xA, 0xE, 0x4); + GS2(3, 7, 0xB, 0xF, 0x6); + /* diagonal step */ + GS2(0, 5, 0xA, 0xF, 0x8); + GS2(1, 6, 0xB, 0xC, 0xA); + GS2(2, 7, 0x8, 0xD, 0xC); + GS2(3, 4, 0x9, 0xE, 0xE); + } +#pragma unroll 16 + for (int i = 0; i < 16; i++) { + int j = i & 7; + h[j] ^= v[i]; + } +} + + +__global__ __launch_bounds__(256,3) void blake256_gpu_hash_80(const uint32_t threads, const uint32_t startNonce, uint64_t * Hash) +{ + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + const uint32_t nonce = startNonce + thread; + uint32_t h[8]; + + uint32_t input[4]; + #pragma unroll 8 + for (int i = 0; i<8; i++) { h[i] = cpu_h[i];} + #pragma unroll 3 + for (int i = 0; i < 3; ++i) input[i] = c_data[16 + i]; + input[3] = nonce; + blake256_compress2nd(h, input, 640); + + + #pragma unroll +for (int i = 0; i<4; i++) { Hash[i*threads + thread] = cuda_swab32ll(MAKE_ULONGLONG(h[2 * i], h[2*i+1])); } + + } +} + +__host__ void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order) +{ + const int threadsperblock = 256; + + + dim3 grid((threads + threadsperblock - 1) / threadsperblock); + dim3 block(threadsperblock); + size_t shared_size = 0; + + + blake256_gpu_hash_80 << > >(threads, startNonce, Hash); + MyStreamSynchronize(NULL, order, thr_id); + +} + +__host__ void blake256_cpu_setBlock_80(uint32_t *pdata) +{ + uint32_t data[20]; + memcpy(data, pdata, 80); + uint32_t h[8]; + for (int i = 0; i<8; i++) { h[i] = c_IV256[i]; } + blake256_compress1st(h, pdata, 512); + cudaMemcpyToSymbol(cpu_h, h, sizeof(h), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(c_data, data, sizeof(data), 0, cudaMemcpyHostToDevice); +} + diff --git a/Algo256/cuda_groestl256.cu b/Algo256/cuda_groestl256.cu new file mode 100644 index 0000000000..1d776a23e1 --- /dev/null +++ b/Algo256/cuda_groestl256.cu @@ -0,0 +1,372 @@ + +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +extern int compute_version[8]; +uint32_t *d_gnounce[8]; +uint32_t *d_GNonce[8]; +__constant__ uint32_t pTarget[8]; + +#include "cuda_helper.h" + + +//////////////////////////////////////////////////////////////////////// + +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + +#define C32e(x) ((SPH_C32(x) >> 24) \ + | ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ + | ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ + | ((SPH_C32(x) << 24) & SPH_C32(0xFF000000))) + +#define PC32up(j, r) ((uint32_t)((j) + (r))) +#define PC32dn(j, r) 0 +#define QC32up(j, r) 0xFFFFFFFF +#define QC32dn(j, r) (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24))) + +#define B32_0(x) __byte_perm(x, 0, 0x4440) +//((x) & 0xFF) +#define B32_1(x) __byte_perm(x, 0, 0x4441) +//(((x) >> 8) & 0xFF) +#define B32_2(x) __byte_perm(x, 0, 0x4442) +//(((x) >> 16) & 0xFF) +#define B32_3(x) __byte_perm(x, 0, 0x4443) +//((x) >> 24) +#define MAXWELL_OR_FERMI 1 +#if MAXWELL_OR_FERMI +#define USE_SHARED 1 +// Maxwell and Fermi cards get the best speed with SHARED access it seems. +#if USE_SHARED +#define T0up(x) (*((uint32_t*)mixtabs + ( (x)))) +#define T0dn(x) (*((uint32_t*)mixtabs + (256+(x)))) +#define T1up(x) (*((uint32_t*)mixtabs + (512+(x)))) +#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x)))) +#define T2up(x) (*((uint32_t*)mixtabs + (1024+(x)))) +#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x)))) +#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x)))) +#define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x)))) +#else +#define T0up(x) tex1Dfetch(t0up2, x) +#define T0dn(x) tex1Dfetch(t0dn2, x) +#define T1up(x) tex1Dfetch(t1up2, x) +#define T1dn(x) tex1Dfetch(t1dn2, x) +#define T2up(x) tex1Dfetch(t2up2, x) +#define T2dn(x) tex1Dfetch(t2dn2, x) +#define T3up(x) tex1Dfetch(t3up2, x) +#define T3dn(x) tex1Dfetch(t3dn2, x) +#endif +#else +#define USE_SHARED 1 +// a healthy mix between shared and textured access provides the highest speed on Compute 3.0 and 3.5! +#define T0up(x) (*((uint32_t*)mixtabs + ( (x)))) +#define T0dn(x) tex1Dfetch(t0dn2, x) +#define T1up(x) tex1Dfetch(t1up2, x) +#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x)))) +#define T2up(x) tex1Dfetch(t2up2, x) +#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x)))) +#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x)))) +#define T3dn(x) tex1Dfetch(t3dn2, x) +#endif + +texture t0up2; +texture t0dn2; +texture t1up2; +texture t1dn2; +texture t2up2; +texture t2dn2; +texture t3up2; +texture t3dn2; + + +#define RSTT(d0, d1, a, b0, b1, b2, b3, b4, b5, b6, b7) do { \ + t[d0] = T0up(B32_0(a[b0])) \ + ^ T1up(B32_1(a[b1])) \ + ^ T2up(B32_2(a[b2])) \ + ^ T3up(B32_3(a[b3])) \ + ^ T0dn(B32_0(a[b4])) \ + ^ T1dn(B32_1(a[b5])) \ + ^ T2dn(B32_2(a[b6])) \ + ^ T3dn(B32_3(a[b7])); \ + t[d1] = T0dn(B32_0(a[b0])) \ + ^ T1dn(B32_1(a[b1])) \ + ^ T2dn(B32_2(a[b2])) \ + ^ T3dn(B32_3(a[b3])) \ + ^ T0up(B32_0(a[b4])) \ + ^ T1up(B32_1(a[b5])) \ + ^ T2up(B32_2(a[b6])) \ + ^ T3up(B32_3(a[b7])); \ + } while (0) + + +extern uint32_t T0up_cpu[]; +extern uint32_t T0dn_cpu[]; +extern uint32_t T1up_cpu[]; +extern uint32_t T1dn_cpu[]; +extern uint32_t T2up_cpu[]; +extern uint32_t T2dn_cpu[]; +extern uint32_t T3up_cpu[]; +extern uint32_t T3dn_cpu[]; + + +__device__ __forceinline__ void groestl256_perm_P(uint32_t *a, uint32_t *mixtabs) +{ + + uint32_t t[16]; + #pragma unroll + for (int r = 0; r<10; r++) + { + + a[0x0] ^= PC32up(0x00, r); + a[0x2] ^= PC32up(0x10, r); + a[0x4] ^= PC32up(0x20, r); + a[0x6] ^= PC32up(0x30, r); + a[0x8] ^= PC32up(0x40, r); + a[0xA] ^= PC32up(0x50, r); + a[0xC] ^= PC32up(0x60, r); + a[0xE] ^= PC32up(0x70, r); + RSTT(0x0, 0x1, a, 0x0, 0x2, 0x4, 0x6, 0x9, 0xB, 0xD, 0xF); + RSTT(0x2, 0x3, a, 0x2, 0x4, 0x6, 0x8, 0xB, 0xD, 0xF, 0x1); + RSTT(0x4, 0x5, a, 0x4, 0x6, 0x8, 0xA, 0xD, 0xF, 0x1, 0x3); + RSTT(0x6, 0x7, a, 0x6, 0x8, 0xA, 0xC, 0xF, 0x1, 0x3, 0x5); + RSTT(0x8, 0x9, a, 0x8, 0xA, 0xC, 0xE, 0x1, 0x3, 0x5, 0x7); + RSTT(0xA, 0xB, a, 0xA, 0xC, 0xE, 0x0, 0x3, 0x5, 0x7, 0x9); + RSTT(0xC, 0xD, a, 0xC, 0xE, 0x0, 0x2, 0x5, 0x7, 0x9, 0xB); + RSTT(0xE, 0xF, a, 0xE, 0x0, 0x2, 0x4, 0x7, 0x9, 0xB, 0xD); + +#pragma unroll + for (int k = 0; k<16; k++) + a[k] = t[k]; + + } +} + +__device__ __forceinline__ void groestl256_perm_Pf( uint32_t *a, uint32_t *mixtabs) +{ + + uint32_t t[16]; +#pragma unroll + for (int r = 0; r<9; r++) + { + a[0x0] ^= PC32up(0x00, r); + a[0x2] ^= PC32up(0x10, r); + a[0x4] ^= PC32up(0x20, r); + a[0x6] ^= PC32up(0x30, r); + a[0x8] ^= PC32up(0x40, r); + a[0xA] ^= PC32up(0x50, r); + a[0xC] ^= PC32up(0x60, r); + a[0xE] ^= PC32up(0x70, r); + RSTT(0x0, 0x1, a, 0x0, 0x2, 0x4, 0x6, 0x9, 0xB, 0xD, 0xF); + RSTT(0x2, 0x3, a, 0x2, 0x4, 0x6, 0x8, 0xB, 0xD, 0xF, 0x1); + RSTT(0x4, 0x5, a, 0x4, 0x6, 0x8, 0xA, 0xD, 0xF, 0x1, 0x3); + RSTT(0x6, 0x7, a, 0x6, 0x8, 0xA, 0xC, 0xF, 0x1, 0x3, 0x5); + RSTT(0x8, 0x9, a, 0x8, 0xA, 0xC, 0xE, 0x1, 0x3, 0x5, 0x7); + RSTT(0xA, 0xB, a, 0xA, 0xC, 0xE, 0x0, 0x3, 0x5, 0x7, 0x9); + RSTT(0xC, 0xD, a, 0xC, 0xE, 0x0, 0x2, 0x5, 0x7, 0x9, 0xB); + RSTT(0xE, 0xF, a, 0xE, 0x0, 0x2, 0x4, 0x7, 0x9, 0xB, 0xD); + +#pragma unroll + for (int k = 0; k<16; k++) + a[k] = t[k]; + + } + a[0x0] ^= 0x09; + a[0x2] ^= 0x19; + a[0x4] ^= 0x29; + a[0xE] ^= 0x79; + RSTT(0xE, 0xF, a, 0xE, 0x0, 0x2, 0x4, 0x7, 0x9, 0xB, 0xD); + a[14] = t[14]; + a[15] = t[15]; + +} + +__device__ __forceinline__ void groestl256_perm_Q(uint32_t *a, uint32_t *mixtabs) +{ + uint32_t t[16]; + #pragma unroll + for (int r = 0; r<10; r++) + { + a[0x0] = ~a[0x0]; + a[0x1] ^= QC32dn(0x00, r); + a[0x2] = ~a[0x2]; + a[0x3] ^= QC32dn(0x10, r); + a[0x4] = ~a[0x4]; + a[0x5] ^= QC32dn(0x20, r); + a[0x6] = ~a[0x6]; + a[0x7] ^= QC32dn(0x30, r); + a[0x8] = ~a[0x8]; + a[0x9] ^= QC32dn(0x40, r); + a[0xA] = ~a[0xA]; + a[0xB] ^= QC32dn(0x50, r); + a[0xC] = ~a[0xC]; + a[0xD] ^= QC32dn(0x60, r); + a[0xE] = ~a[0xE]; + a[0xF] ^= QC32dn(0x70, r); + RSTT(0x0, 0x1, a, 0x2, 0x6, 0xA, 0xE, 0x1, 0x5, 0x9, 0xD); + RSTT(0x2, 0x3, a, 0x4, 0x8, 0xC, 0x0, 0x3, 0x7, 0xB, 0xF); + RSTT(0x4, 0x5, a, 0x6, 0xA, 0xE, 0x2, 0x5, 0x9, 0xD, 0x1); + RSTT(0x6, 0x7, a, 0x8, 0xC, 0x0, 0x4, 0x7, 0xB, 0xF, 0x3); + RSTT(0x8, 0x9, a, 0xA, 0xE, 0x2, 0x6, 0x9, 0xD, 0x1, 0x5); + RSTT(0xA, 0xB, a, 0xC, 0x0, 0x4, 0x8, 0xB, 0xF, 0x3, 0x7); + RSTT(0xC, 0xD, a, 0xE, 0x2, 0x6, 0xA, 0xD, 0x1, 0x5, 0x9); + RSTT(0xE, 0xF, a, 0x0, 0x4, 0x8, 0xC, 0xF, 0x3, 0x7, 0xB); + +#pragma unroll + for (int k = 0; k<16; k++) + a[k] = t[k]; + + + } +} + +__global__ __launch_bounds__(256,1) void groestl256_gpu_hash32(int threads, uint32_t startNounce, uint64_t *outputHash, uint32_t *nonceVector) +{ + +#if USE_SHARED + extern __shared__ uint32_t mixtabs[]; + + if (threadIdx.x < 256) + { + *((uint32_t*)mixtabs + (threadIdx.x)) = tex1Dfetch(t0up2, threadIdx.x); + *((uint32_t*)mixtabs + (256 + threadIdx.x)) = tex1Dfetch(t0dn2, threadIdx.x); + *((uint32_t*)mixtabs + (512 + threadIdx.x)) = tex1Dfetch(t1up2, threadIdx.x); + *((uint32_t*)mixtabs + (768 + threadIdx.x)) = tex1Dfetch(t1dn2, threadIdx.x); + *((uint32_t*)mixtabs + (1024 + threadIdx.x)) = tex1Dfetch(t2up2, threadIdx.x); + *((uint32_t*)mixtabs + (1280 + threadIdx.x)) = tex1Dfetch(t2dn2, threadIdx.x); + *((uint32_t*)mixtabs + (1536 + threadIdx.x)) = tex1Dfetch(t3up2, threadIdx.x); + *((uint32_t*)mixtabs + (1792 + threadIdx.x)) = tex1Dfetch(t3dn2, threadIdx.x); + } + + __syncthreads(); +#endif + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + // GROESTL + uint32_t message[16]; + uint32_t state[16]; + + uint32_t nonce = startNounce +thread; + +#pragma unroll + for (int k = 0; k<4; k++) LOHI(message[2*k],message[2*k+1],outputHash[k*threads+thread]); +#pragma unroll + for (int k = 9; k<15; k++) + message[k] = 0; + + message[8] = 0x80; + message[15] = 0x01000000; + +#pragma unroll 16 + for (int u = 0; u<16; u++) state[u] = message[u]; + state[15] ^= 0x10000; + + // Perm + +#if USE_SHARED + + groestl256_perm_P(state, mixtabs); + state[15] ^= 0x10000; + groestl256_perm_Q(message, mixtabs); + +#else + groestl256_perm_P(state, NULL); + state[15] ^= 0x10000; + groestl256_perm_Q(message, NULL); +#endif +#pragma unroll 16 + for (int u = 0; u<16; u++) state[u] ^= message[u]; + message[14] = state[14]; + message[15] = state[15]; + +#if USE_SHARED + groestl256_perm_Pf(state, mixtabs); +#else + groestl256_perm_Pf(state, NULL); +#endif +state[14] ^= message[14]; +state[15] ^= message[15]; + + if (((uint64_t*)state)[7] <= ((uint64_t*)pTarget)[3]) { nonceVector[0] = nonce; } +} +} + + +#define texDef(texname, texmem, texsource, texsize) \ + unsigned int *texmem; \ + cudaMalloc(&texmem, texsize); \ + cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \ + texname.normalized = 0; \ + texname.filterMode = cudaFilterModePoint; \ + texname.addressMode[0] = cudaAddressModeClamp; \ + { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); \ + cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ + + + + + + +void groestl256_cpu_init(int thr_id, int threads) +{ + + // Texturen mit obigem Makro initialisieren + texDef(t0up2, d_T0up, T0up_cpu, sizeof(uint32_t) * 256); + texDef(t0dn2, d_T0dn, T0dn_cpu, sizeof(uint32_t) * 256); + texDef(t1up2, d_T1up, T1up_cpu, sizeof(uint32_t) * 256); + texDef(t1dn2, d_T1dn, T1dn_cpu, sizeof(uint32_t) * 256); + texDef(t2up2, d_T2up, T2up_cpu, sizeof(uint32_t) * 256); + texDef(t2dn2, d_T2dn, T2dn_cpu, sizeof(uint32_t) * 256); + texDef(t3up2, d_T3up, T3up_cpu, sizeof(uint32_t) * 256); + texDef(t3dn2, d_T3dn, T3dn_cpu, sizeof(uint32_t) * 256); + + cudaMalloc(&d_GNonce[thr_id], sizeof(uint32_t)); + cudaMallocHost(&d_gnounce[thr_id], 1*sizeof(uint32_t)); +} + + +__host__ uint32_t groestl256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + uint32_t result = 0xffffffff; + cudaMemset(d_GNonce[thr_id], 0xff, sizeof(uint32_t)); + + + + const int threadsperblock = 256; + + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + +#if USE_SHARED + size_t shared_size = 8 * 256 * sizeof(uint32_t); +#else + size_t shared_size = 0; +#endif + groestl256_gpu_hash32<<>>(threads, startNounce, d_outputHash, d_GNonce[thr_id]); + + MyStreamSynchronize(NULL, order, thr_id); + cudaMemcpy(d_gnounce[thr_id], d_GNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); +// cudaThreadSynchronize(); + result = *d_gnounce[thr_id]; + + return result; +} + + +__host__ void groestl256_setTarget(const void *pTargetIn) +{ + cudaMemcpyToSymbol(pTarget, pTargetIn, 8 * sizeof(uint32_t), 0, cudaMemcpyHostToDevice); +} \ No newline at end of file diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu new file mode 100644 index 0000000000..b24263b4b9 --- /dev/null +++ b/Algo256/cuda_keccak256.cu @@ -0,0 +1,327 @@ + +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +extern int compute_version[8]; +#include "cuda_helper.h" + +static const uint64_t host_keccak_round_constants[24] = { + 0x0000000000000001ull, 0x0000000000008082ull, + 0x800000000000808aull, 0x8000000080008000ull, + 0x000000000000808bull, 0x0000000080000001ull, + 0x8000000080008081ull, 0x8000000000008009ull, + 0x000000000000008aull, 0x0000000000000088ull, + 0x0000000080008009ull, 0x000000008000000aull, + 0x000000008000808bull, 0x800000000000008bull, + 0x8000000000008089ull, 0x8000000000008003ull, + 0x8000000000008002ull, 0x8000000000000080ull, + 0x000000000000800aull, 0x800000008000000aull, + 0x8000000080008081ull, 0x8000000000008080ull, + 0x0000000080000001ull, 0x8000000080008008ull +}; +uint32_t *d_nounce[8]; +uint32_t *d_KNonce[8]; +__constant__ uint32_t pTarget[8]; +__constant__ uint64_t keccak_round_constants[24]; + + + __constant__ uint64_t c_PaddedMessage80[10]; // padded message (80 bytes + padding) + + +static __device__ __forceinline__ void keccak_blockv35(uint2 *s, const uint64_t *keccak_round_constants) { + size_t i; + uint2 t[5], u[5], v, w; + + + + #pragma unroll + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + u[0] = t[4] ^ ROL2(t[1], 1); + u[1] = t[0] ^ ROL2(t[2], 1); + u[2] = t[1] ^ ROL2(t[3], 1); + u[3] = t[2] ^ ROL2(t[4], 1); + u[4] = t[3] ^ ROL2(t[0], 1); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[1]; + s[1] = ROL2(s[6], 44); + s[6] = ROL2(s[9], 20); + s[9] = ROL2(s[22], 61); + s[22] = ROL2(s[14], 39); + s[14] = ROL2(s[20], 18); + s[20] = ROL2(s[2], 62); + s[2] = ROL2(s[12], 43); + s[12] = ROL2(s[13], 25); + s[13] = ROL2(s[19], 8); + s[19] = ROL2(s[23], 56); + s[23] = ROL2(s[15], 41); + s[15] = ROL2(s[4], 27); + s[4] = ROL2(s[24], 14); + s[24] = ROL2(s[21], 2); + s[21] = ROL2(s[8], 55); + s[8] = ROL2(s[16], 45); + s[16] = ROL2(s[5], 36); + s[5] = ROL2(s[3], 28); + s[3] = ROL2(s[18], 21); + s[18] = ROL2(s[17], 15); + s[17] = ROL2(s[11], 10); + s[11] = ROL2(s[7], 6); + s[7] = ROL2(s[10], 3); + s[10] = ROL2(v, 1); + + /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ + v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w; + v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w; + v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; + v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; + v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; + + /* iota: a[0,0] ^= round constant */ + s[0] ^= vectorize(keccak_round_constants[i]); + } +} + +static __device__ __forceinline__ void keccak_blockv30(uint64_t *s, const uint64_t *keccak_round_constants) { + size_t i; + uint64_t t[5], u[5], v, w; + +#pragma unroll + + // #pragma unroll + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + u[0] = t[4] ^ ROTL64(t[1], 1); + u[1] = t[0] ^ ROTL64(t[2], 1); + u[2] = t[1] ^ ROTL64(t[3], 1); + u[3] = t[2] ^ ROTL64(t[4], 1); + u[4] = t[3] ^ ROTL64(t[0], 1); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[1]; + s[1] = ROTL64(s[6], 44); + s[6] = ROTL64(s[9], 20); + s[9] = ROTL64(s[22], 61); + s[22] = ROTL64(s[14], 39); + s[14] = ROTL64(s[20], 18); + s[20] = ROTL64(s[2], 62); + s[2] = ROTL64(s[12], 43); + s[12] = ROTL64(s[13], 25); + s[13] = ROTL64(s[19], 8); + s[19] = ROTL64(s[23], 56); + s[23] = ROTL64(s[15], 41); + s[15] = ROTL64(s[4], 27); + s[4] = ROTL64(s[24], 14); + s[24] = ROTL64(s[21], 2); + s[21] = ROTL64(s[8], 55); + s[8] = ROTL64(s[16], 45); + s[16] = ROTL64(s[5], 36); + s[5] = ROTL64(s[3], 28); + s[3] = ROTL64(s[18], 21); + s[18] = ROTL64(s[17], 15); + s[17] = ROTL64(s[11], 10); + s[11] = ROTL64(s[7], 6); + s[7] = ROTL64(s[10], 3); + s[10] = ROTL64(v, 1); + + /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ + v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w; + v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w; + v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; + v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; + v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; + + /* iota: a[0,0] ^= round constant */ + s[0] ^= keccak_round_constants[i]; + } +} + +__global__ void __launch_bounds__(256,3) keccak256_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread; +#if __CUDA_ARCH__ >= 350 + + uint2 keccak_gpu_state[25]; + #pragma unroll 25 + for (int i=0; i<25; i++) { + if(i<9) {keccak_gpu_state[i] = vectorize(c_PaddedMessage80[i]);} + else {keccak_gpu_state[i] = make_uint2(0,0);}} + keccak_gpu_state[9]= vectorize(c_PaddedMessage80[9]); + keccak_gpu_state[9].y = cuda_swab32(nounce); + keccak_gpu_state[10]=make_uint2(1,0); + keccak_gpu_state[16]=make_uint2(0,0x80000000); + keccak_blockv35(keccak_gpu_state,keccak_round_constants); + + if (devectorize(keccak_gpu_state[3]) <= ((uint64_t*)pTarget)[3]) {resNounce[0] = nounce;} + +#else + + uint64_t keccak_gpu_state[25]; + #pragma unroll 25 + for (int i = 0; i<25; i++) { + if (i<9) { keccak_gpu_state[i] = c_PaddedMessage80[i]; } + else { keccak_gpu_state[i] = 0; } + } + keccak_gpu_state[9] = REPLACE_HIWORD(c_PaddedMessage80[9], cuda_swab32(nounce)); + keccak_gpu_state[10] = 0x0000000000000001; + keccak_gpu_state[16] = 0x8000000000000000; + + keccak_blockv30(keccak_gpu_state, keccak_round_constants); + if (keccak_gpu_state[3] <= ((uint64_t*)pTarget)[3]) { resNounce[0] = nounce; } +#endif + + + } //thread +} + +__global__ void __launch_bounds__(256,3) keccak256_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + + uint2 keccak_gpu_state[25]; +#pragma unroll 25 + for (int i = 0; i<25; i++) { + if (i<4) { keccak_gpu_state[i] = vectorize(outputHash[i*threads+thread]); } + else { keccak_gpu_state[i] = make_uint2(0, 0 ); } + } + keccak_gpu_state[4] = make_uint2( 1, 0 ); + keccak_gpu_state[16] = make_uint2( 0, 0x80000000); + keccak_blockv35(keccak_gpu_state, keccak_round_constants); + +#pragma unroll 4 + for (int i=0; i<4;i++) { +outputHash[i*threads+thread]=devectorize(keccak_gpu_state[i]);} + + + } //thread +} + + +__global__ void __launch_bounds__(256, 3) keccak256_gpu_hash_32_v30(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint64_t keccak_gpu_state[25]; +#pragma unroll 25 + for (int i = 0; i<25; i++) { + if (i<4) { keccak_gpu_state[i] = outputHash[i*threads + thread]; } + else { keccak_gpu_state[i] = 0; } + } + keccak_gpu_state[4] = 0x0000000000000001; + keccak_gpu_state[16] = 0x8000000000000000; + + keccak_blockv30(keccak_gpu_state, keccak_round_constants); +#pragma unroll 4 + for (int i = 0; i<4; i++) { outputHash[i*threads + thread] = keccak_gpu_state[i]; } + + } //thread +} + + + +void keccak256_cpu_init(int thr_id, int threads) +{ + + + cudaMemcpyToSymbol( keccak_round_constants, host_keccak_round_constants, sizeof(host_keccak_round_constants),0, cudaMemcpyHostToDevice); + cudaMalloc(&d_KNonce[thr_id], sizeof(uint32_t)); + cudaMallocHost(&d_nounce[thr_id], 1*sizeof(uint32_t)); +} + +__host__ uint32_t keccak256_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +{ + uint32_t result = 0xffffffff; + cudaMemset(d_KNonce[thr_id], 0xff, sizeof(uint32_t)); + const int threadsperblock = 256; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + keccak256_gpu_hash_80<<>>(threads, startNounce, d_outputHash, d_KNonce[thr_id]); + + MyStreamSynchronize(NULL, order, thr_id); + cudaMemcpy(d_nounce[thr_id], d_KNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); + cudaThreadSynchronize(); + result = *d_nounce[thr_id]; + + return result; +} + +__host__ void keccak256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 256; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock - 1) / threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + if (compute_version[thr_id] >= 35) { + keccak256_gpu_hash_32 << > >(threads, startNounce, d_outputHash); + } + else { + keccak256_gpu_hash_32_v30 << > >(threads, startNounce, d_outputHash); + } + MyStreamSynchronize(NULL, order, thr_id); + +} + +__host__ void keccak256_setBlock_80(void *pdata,const void *pTargetIn) +{ + unsigned char PaddedMessage[80]; + memcpy(PaddedMessage, pdata, 80); + cudaMemcpyToSymbol( pTarget, pTargetIn, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 10*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); +} \ No newline at end of file diff --git a/Algo256/cuda_skein256.cu b/Algo256/cuda_skein256.cu new file mode 100644 index 0000000000..82cdb96e6b --- /dev/null +++ b/Algo256/cuda_skein256.cu @@ -0,0 +1,311 @@ + +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +extern int compute_version[8]; +#include "cuda_helper.h" + +static __constant__ uint64_t SKEIN_IV512_256[8] = { + 0xCCD044A12FDB3E13, 0xE83590301A79A9EB, + 0x55AEA0614F816E6F, 0x2A2767A4AE9B94DB, + 0xEC06025E74DD7683, 0xE7A436CDC4746251, + 0xC36FBAF9393AD185, 0x3EEDBA1833EDFC13 +}; + +static __constant__ uint2 vSKEIN_IV512_256[8] = { + {0x2FDB3E13, 0xCCD044A1 }, + {0x1A79A9EB, 0xE8359030 }, + {0x4F816E6F, 0x55AEA061 }, + {0xAE9B94DB, 0x2A2767A4 }, + {0x74DD7683, 0xEC06025E }, + {0xC4746251, 0xE7A436CD }, + {0x393AD185, 0xC36FBAF9 }, + {0x33EDFC13, 0x3EEDBA18 } +}; + +static __constant__ int ROT256[8][4] = +{ + 46,36, 19, 37, + 33,27, 14, 42, + 17,49, 36, 39, + 44, 9, 54, 56, + 39,30, 34, 24, + 13,50, 10, 17, + 25,29, 39, 43, + 8, 35, 56, 22, +}; + +static __constant__ uint2 skein_ks_parity = { 0xA9FC1A22,0x1BD11BDA}; +static __constant__ uint64_t skein_ks_parity64 = 0x1BD11BDAA9FC1A22; +static __constant__ uint2 t12[6] = +{ +{ 0x20, 0 }, +{ 0, 0xf0000000 }, +{ 0x20, 0xf0000000 }, +{ 0x08, 0 }, +{ 0, 0xff000000 }, +{ 0x08, 0xff000000 } +}; + +static __constant__ uint64_t t12_30[6] = +{ 0x20, +0xf000000000000000, +0xf000000000000020, +0x08, +0xff00000000000000, +0xff00000000000008 +}; + + +static __forceinline__ __device__ void Round512v35(uint2 &p0, uint2 &p1, uint2 &p2, uint2 &p3, uint2 &p4, uint2 &p5, uint2 &p6, uint2 &p7, int ROT) +{ + p0 += p1; p1 = ROL2(p1, ROT256[ROT][0]); p1 ^= p0; + p2 += p3; p3 = ROL2(p3, ROT256[ROT][1]); p3 ^= p2; + p4 += p5; p5 = ROL2(p5, ROT256[ROT][2]); p5 ^= p4; + p6 += p7; p7 = ROL2(p7, ROT256[ROT][3]); p7 ^= p6; +} + + +static __forceinline__ __device__ void Round_8_512v35(uint2 *ks,uint2 *ts,uint2 &p0, uint2 &p1, uint2 &p2, uint2 &p3, uint2 &p4, + uint2 &p5, uint2 &p6, uint2 &p7, int R) +{ + Round512v35(p0, p1, p2, p3, p4, p5, p6, p7, 0); + Round512v35(p2, p1, p4, p7, p6, p5, p0, p3, 1); + Round512v35(p4, p1, p6, p3, p0, p5, p2, p7, 2); + Round512v35(p6, p1, p0, p7, p2, p5, p4, p3, 3); + p0 += ks[((R)+0) % 9]; /* inject the key schedule value */ + p1 += ks[((R)+1) % 9]; + p2 += ks[((R)+2) % 9]; + p3 += ks[((R)+3) % 9]; + p4 += ks[((R)+4) % 9]; + p5 += ks[((R)+5) % 9] + ts[((R)+0) % 3]; + p6 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; + p7 += ks[((R)+7) % 9] + make_uint2((R),0); + Round512v35(p0, p1, p2, p3, p4, p5, p6, p7, 4); + Round512v35(p2, p1, p4, p7, p6, p5, p0, p3, 5); + Round512v35(p4, p1, p6, p3, p0, p5, p2, p7, 6); + Round512v35(p6, p1, p0, p7, p2, p5, p4, p3, 7); + p0 += ks[((R)+1) % 9]; /* inject the key schedule value */ + p1 += ks[((R)+2) % 9]; + p2 += ks[((R)+3) % 9]; + p3 += ks[((R)+4) % 9]; + p4 += ks[((R)+5) % 9]; + p5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; + p6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; + p7 += ks[((R)+8) % 9] + make_uint2((R)+1, 0); +} + + +static __forceinline__ __device__ void Round512v30(uint64_t &p0, uint64_t &p1, uint64_t &p2, uint64_t &p3, uint64_t &p4, uint64_t &p5, uint64_t &p6, uint64_t &p7, int ROT) +{ + p0 += p1; p1 = ROTL64(p1, ROT256[ROT][0]); p1 ^= p0; + p2 += p3; p3 = ROTL64(p3, ROT256[ROT][1]); p3 ^= p2; + p4 += p5; p5 = ROTL64(p5, ROT256[ROT][2]); p5 ^= p4; + p6 += p7; p7 = ROTL64(p7, ROT256[ROT][3]); p7 ^= p6; +} + +static __forceinline__ __device__ void Round_8_512v30(uint64_t *ks, uint64_t *ts, uint64_t &p0, uint64_t &p1, uint64_t &p2, uint64_t &p3, uint64_t &p4, + uint64_t &p5, uint64_t &p6, uint64_t &p7, int R) +{ + Round512v30(p0, p1, p2, p3, p4, p5, p6, p7, 0); + Round512v30(p2, p1, p4, p7, p6, p5, p0, p3, 1); + Round512v30(p4, p1, p6, p3, p0, p5, p2, p7, 2); + Round512v30(p6, p1, p0, p7, p2, p5, p4, p3, 3); + p0 += ks[((R)+0) % 9]; /* inject the key schedule value */ + p1 += ks[((R)+1) % 9]; + p2 += ks[((R)+2) % 9]; + p3 += ks[((R)+3) % 9]; + p4 += ks[((R)+4) % 9]; + p5 += ks[((R)+5) % 9] + ts[((R)+0) % 3]; + p6 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; + p7 += ks[((R)+7) % 9] + R; + Round512v30(p0, p1, p2, p3, p4, p5, p6, p7, 4); + Round512v30(p2, p1, p4, p7, p6, p5, p0, p3, 5); + Round512v30(p4, p1, p6, p3, p0, p5, p2, p7, 6); + Round512v30(p6, p1, p0, p7, p2, p5, p4, p3, 7); + p0 += ks[((R)+1) % 9]; /* inject the key schedule value */ + p1 += ks[((R)+2) % 9]; + p2 += ks[((R)+3) % 9]; + p3 += ks[((R)+4) % 9]; + p4 += ks[((R)+5) % 9]; + p5 += ks[((R)+6) % 9] + ts[((R)+1) % 3]; + p6 += ks[((R)+7) % 9] + ts[((R)+2) % 3]; + p7 += ks[((R)+8) % 9] + (R)+1; +} + + + +__global__ void __launch_bounds__(256,3) skein256_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint2 h[9]; + uint2 t[3]; + uint2 dt0,dt1,dt2,dt3; + uint2 p0, p1, p2, p3, p4, p5, p6, p7; + h[8] = skein_ks_parity; + for (int i = 0; i<8; i++) { + h[i] = vSKEIN_IV512_256[i]; + h[8] ^= h[i];} + + t[0]=t12[0]; + t[1]=t12[1]; + t[2]=t12[2]; + + + LOHI(dt0.x,dt0.y,outputHash[thread]); + LOHI(dt1.x,dt1.y,outputHash[threads+thread]); + LOHI(dt2.x,dt2.y,outputHash[2*threads+thread]); + LOHI(dt3.x,dt3.y,outputHash[3*threads+thread]); + + p0 = h[0] + dt0; + p1 = h[1] + dt1; + p2 = h[2] + dt2; + p3 = h[3] + dt3; + p4 = h[4]; + p5 = h[5] + t[0]; + p6 = h[6] + t[1]; + p7 = h[7]; + + #pragma unroll + for (int i = 1; i<19; i+=2) {Round_8_512v35(h,t,p0,p1,p2,p3,p4,p5,p6,p7,i);} + p0 ^= dt0; + p1 ^= dt1; + p2 ^= dt2; + p3 ^= dt3; + + h[0] = p0; + h[1] = p1; + h[2] = p2; + h[3] = p3; + h[4] = p4; + h[5] = p5; + h[6] = p6; + h[7] = p7; + h[8] = skein_ks_parity; + #pragma unroll 8 + for (int i = 0; i<8; i++) { h[8] ^= h[i]; } + + t[0] = t12[3]; + t[1] = t12[4]; + t[2] = t12[5]; + p5 += t[0]; //p5 already equal h[5] + p6 += t[1]; + #pragma unroll + for (int i = 1; i<19; i+=2) { Round_8_512v35(h, t, p0, p1, p2, p3, p4, p5, p6, p7, i); } + + outputHash[thread] = devectorize(p0); + outputHash[threads+thread] = devectorize(p1); + outputHash[2*threads+thread] = devectorize(p2); + outputHash[3*threads+thread] = devectorize(p3); + + } //thread +} + +__global__ void __launch_bounds__(256, 3) skein256_gpu_hash_32_v30(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint64_t h[9]; + uint64_t t[3]; + uint64_t dt0, dt1, dt2, dt3; + uint64_t p0, p1, p2, p3, p4, p5, p6, p7; + h[8] = skein_ks_parity64; + for (int i = 0; i<8; i++) { + h[i] = SKEIN_IV512_256[i]; + h[8] ^= h[i]; + } + + t[0] = devectorize(t12[0]); + t[1] = devectorize(t12[1]); + t[2] = devectorize(t12[2]); + + dt0 = outputHash[thread]; + dt1 = outputHash[threads+thread]; + dt2 = outputHash[2*threads+thread]; + dt3 = outputHash[3*threads+thread]; + p0 = h[0] + dt0; + p1 = h[1] + dt1; + p2 = h[2] + dt2; + p3 = h[3] + dt3; + p4 = h[4]; + p5 = h[5] + t[0]; + p6 = h[6] + t[1]; + p7 = h[7]; + +#pragma unroll + for (int i = 1; i<19; i += 2) { Round_8_512v30(h, t, p0, p1, p2, p3, p4, p5, p6, p7, i); } + p0 ^= dt0; + p1 ^= dt1; + p2 ^= dt2; + p3 ^= dt3; + + h[0] = p0; + h[1] = p1; + h[2] = p2; + h[3] = p3; + h[4] = p4; + h[5] = p5; + h[6] = p6; + h[7] = p7; + h[8] = skein_ks_parity64; +#pragma unroll 8 + for (int i = 0; i<8; i++) { h[8] ^= h[i]; } + + t[0] = t12_30[3]; + t[1] = t12_30[4]; + t[2] = t12_30[5]; + p5 += t[0]; //p5 already equal h[5] + p6 += t[1]; +#pragma unroll + for (int i = 1; i<19; i += 2) { Round_8_512v30(h, t, p0, p1, p2, p3, p4, p5, p6, p7, i); } + + outputHash[thread] = p0; + outputHash[threads + thread] = p1; + outputHash[2 * threads + thread] = p2; + outputHash[3 * threads + thread] = p3; + + + } //thread +} + + +void skein256_cpu_init(int thr_id, int threads) +{ +//empty //mepty //mpety //mptey// mptye +} + + +__host__ void skein256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 256; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock - 1) / threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + if (compute_version[thr_id] >= 35) { + skein256_gpu_hash_32 << > >(threads, startNounce, d_outputHash); + } else { + skein256_gpu_hash_32_v30 << > >(threads, startNounce, d_outputHash); + } + MyStreamSynchronize(NULL, order, thr_id); + +} + diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu new file mode 100644 index 0000000000..883d6a4218 --- /dev/null +++ b/Algo256/keccak256.cu @@ -0,0 +1,102 @@ +/* + * test routine for new algorithm + * + */ + +extern "C" +{ +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_keccak.h" + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + +extern void keccak256_cpu_init(int thr_id, int threads); +extern void keccak256_setBlock_80(void *pdata,const void *ptarget); +extern uint32_t keccak256_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); + +// fresh Hashfunktion +inline void cpu_hash(void *state, const void *input) +{ + + sph_keccak_context ctx_keccak; + + + uint32_t hash[16]; + + sph_keccak256_init(&ctx_keccak); + sph_keccak256 (&ctx_keccak, input, 80); + sph_keccak256_close(&ctx_keccak, (void*) hash); + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + keccak256_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) { + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + } + keccak256_setBlock_80((void*)endiandata, ptarget); + do { + int order = 0; + + uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + + cpu_hash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce,vhash64[7],Htarg); + } + } + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/Debug/ccminer.vcxprojResolveAssemblyReference.cache b/Debug/ccminer.vcxprojResolveAssemblyReference.cache new file mode 100644 index 0000000000..c6f2429bec Binary files /dev/null and b/Debug/ccminer.vcxprojResolveAssemblyReference.cache differ diff --git a/Makefile.am b/Makefile.am index 831cfd23c5..bf33157c00 100644 --- a/Makefile.am +++ b/Makefile.am @@ -18,6 +18,8 @@ ccminer_SOURCES = elist.h miner.h compat.h \ compat/sys/time.h compat/getopt/getopt.h \ cpu-miner.c util.c sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c hefty1.c scrypt.c sha2.c \ sph/bmw.h sph/sph_blake.h sph/sph_groestl.h sph/sph_jh.h sph/sph_keccak.h sph/sph_skein.h sph/sph_types.h \ + sph/sph_sha2.c sph/tiger.c sph/ripemd.c sph/sph_sha2.h sph/sph_tiger.h sph/sph_ripemd.h \ + sph/neoscrypt.c sph/neoscrypt.h sph/Lyra2.c sph/Sponge.c sph/Lyra2.h sph/Sponge.h \ heavy/heavy.cu \ heavy/cuda_blake512.cu heavy/cuda_blake512.h \ heavy/cuda_combine.cu heavy/cuda_combine.h \ @@ -36,25 +38,48 @@ ccminer_SOURCES = elist.h miner.h compat.h \ cuda_nist5.cu \ sph/cubehash.c sph/echo.c sph/luffa.c sph/shavite.c sph/simd.c \ sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \ - x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu \ + sph/shabal.c sph/whirlpool.c sph/sph_shabal.h sph/sph_whirlpool.h \ + sph/haval.c sph/sph_haval.h sph/sph_sha2.h sph/sha2big.c \ + qubit/qubit.cu qubit/qubit_luffa512.cu x13/x14.cu x13/fresh.cu x13/x17.cu \ + x13/x13.cu x13/cuda_x13_hamsi512.cu x13/cuda_x13_fugue512.cu x13/x15.cu x13/cuda_shabal512.cu x13/cuda_whirlpool512.cu \ + x13/cuda_haval512.cu x13/cuda_sha512.cu qubit/doom.cu x13/goalcoin.cu \ + x13/whirlpool.cu \ + Algo256/cuda_keccak256.cu Algo256/keccak256.cu Algo256/cuda_blake256.cu \ + Algo256/cuda_groestl256.cu Algo256/cuda_skein256.cu \ + lyra2/cuda_lyra2.cu lyra2/lyra2RE.cu \ + pluck/cuda_pluck.cu pluck/pluck.cu \ x11/x11.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \ - x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu + x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \ + x13/cuda_m7_sha256.cu x13/cuda_mul.cu x13/cuda_mul2.cu x13/cuda_ripemd160.cu x13/cuda_tiger192.cu \ + x13/m7_keccak512.cu x13/m7.cu qubit/deep.cu ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ -ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ +ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ -lmpir ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME # we're now targeting all major compute architectures within one binary. +# require cuda 6.5 or higher .cu.o: - $(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + $(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< # Shavite compiles faster with 128 regs x11/cuda_x11_shavite512.o: x11/cuda_x11_shavite512.cu - $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=128 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --maxrregcount=128 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + +# whirlpool compiles faster with 64 regs +x13/cuda_whirlpool512.o: x13/cuda_whirlpool512.cu + $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --maxrregcount=64 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + +# whirlpool compiles faster with 64 regs +x13/cuda_tiger192.o: x13/cuda_tiger192.cu + $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --maxrregcount=64 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< # ABI requiring code modules quark/cuda_quark_compactionTest.o: quark/cuda_quark_compactionTest.cu - $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu - $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + +x13/cuda_mul.o: x13/cuda_mul.cu + $(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_35,code=\"sm_35,compute_35\" -gencode=arch=compute_50,code=\"sm_50,compute_50\" -gencode=arch=compute_52,code=\"sm_52,compute_52\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 93621c4e84..4d0cb5ef3c 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -1,5 +1,5 @@  - + Debug @@ -27,27 +27,31 @@ Application true MultiByte + v120 Application true MultiByte + v120 Application false true MultiByte + v120 Application false true MultiByte + v120 - + @@ -79,13 +83,14 @@ Level3 Disabled WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) - .;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + ..\mpir-2.6.0\build.vc10\Win32\Release;.;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + ProgramDatabase true Console cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MTd.lib;ssleay32MTd.lib;%(AdditionalDependencies) - ..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Debug;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) + ..\mpir-2.6.0\build.vc10\Win32\Release;..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Debug;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" @@ -100,7 +105,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" true true - compute_35,sm_35 + compute_30,sm_30;compute_35,sm_35;compute_50,sm_50 @@ -138,27 +143,28 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + MultiThreaded Level3 MaxSpeed true true WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;CURL_STATICLIB;SCRYPT_KECCAK512;SCRYPT_CHACHA;SCRYPT_CHOOSE_COMPILETIME;%(PreprocessorDefinitions) - .;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir) + ..\mpir-2.6.0\build.vc10\Win32\Release;.;compat;compat\jansson;compat\getopt;..\pthreads\Pre-built.2\include;..\curl-7.29.0\include;..\OpenSSL-Win32\include;%(AdditionalIncludeDirectories);C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.5 true true true Console - cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MT.lib;ssleay32MT.lib;%(AdditionalDependencies) - ..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Release;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) + mpir.lib;mpirxx.lib;cudart.lib;cuda.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;pthreadVC2.lib;libcurl.lib;ws2_32.lib;Wldap32.lib;libeay32MT.lib;ssleay32MT.lib;%(AdditionalDependencies) + ..\mpir-2.6.0\build.vc10\Win32\Release;..\pthreads\Pre-built.2\lib\x86;..\curl-7.29.0\build\lib\Release;..\OpenSSL-Win32\lib\VC\static;%(AdditionalLibraryDirectories);$(CudaToolkitLibDir) echo copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" - true + false 80 @@ -166,13 +172,16 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" true true - compute_35,sm_35 + compute_52,sm_52;compute_50,sm_50;compute_35,sm_35 + false + .\Release\PrepFile + MultiThreaded Level3 MaxSpeed true @@ -204,6 +213,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" compute_35,sm_35 + O3 @@ -239,14 +249,24 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + + + + + + + + /TP %(AdditionalOptions) /TP %(AdditionalOptions) @@ -264,6 +284,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + @@ -273,22 +294,43 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" + + + + + + + + + + + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + 80 + 80 + + + -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) @@ -338,7 +380,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=no -v" %(AdditionalOptions) - -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) @@ -362,7 +404,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=yes -v" %(AdditionalOptions) - -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) @@ -373,8 +415,12 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) + + + + - -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v -dlcm=cg" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) @@ -398,7 +444,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=no -v" %(AdditionalOptions) - -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) @@ -416,7 +462,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=no -v" %(AdditionalOptions) - -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) @@ -433,6 +479,20 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) + + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) @@ -463,7 +523,7 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=no -v" %(AdditionalOptions) - -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) 128 @@ -491,11 +551,73 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "-abi=no -v -dlcm=cg" %(AdditionalOptions) + -Xptxas "-v -dlcm=cg" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + 74 + 74 + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + 64 + 64 + + + -Xptxas "-abi=no -v -dlcm=cg" %(AdditionalOptions) + -Xptxas " -dlcm=cg" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + 64 + 64 + -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) + 64 + 64 -Xptxas "-abi=no -v" %(AdditionalOptions) @@ -503,15 +625,55 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) -Xptxas "-abi=no -v" %(AdditionalOptions) + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + -Xptxas "-abi=no -v" %(AdditionalOptions) + + + + + + + + - + \ No newline at end of file diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 2fb6824895..941ea883b8 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -58,6 +58,18 @@ {d67a2af7-4851-4d21-910e-87791bc8ee35} + + {f3ed23a2-8ce7-41a5-b051-6da56047dc35} + + + {a448ec8d-f346-4944-9786-aa0b55519306} + + + {e23c7c23-ddfd-4da2-a51d-4fbeab96c66c} + + + {59eb6b57-944a-425e-920a-bb168e950c45} + @@ -153,6 +165,36 @@ Source Files\sph + + Source Files\sph + + + Source Files\sph + + + Source Files\sph + + + Source Files\sph + + + Source Files\sph + + + Source Files\sph + + + Source Files\sph + + + Source Files\sph + + + Source Files\sph + + + Source Files\sph + @@ -203,9 +245,6 @@ Header Files\sph - - Header Files\sph - Header Files\sph @@ -254,14 +293,36 @@ Header Files\sph + + Header Files\sph + + + + Header Files\sph + + + Header Files\sph + + + Header Files\sph + + + Header Files\sph + + + Header Files\sph + + + Header Files\sph + + + Header Files\sph + + + Header Files\CUDA + - - Source Files\CUDA - - - Source Files\CUDA - Source Files\CUDA\JHA @@ -271,9 +332,6 @@ Source Files\CUDA\quark - - Source Files\CUDA - Source Files\CUDA\JHA @@ -361,5 +419,104 @@ Source Files\CUDA\x13 + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\qubit + + + Source Files\CUDA\qubit + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\Algo256 + + + Source Files\CUDA\Algo256 + + + Source Files\CUDA\Algo256 + + + Source Files\CUDA\Algo256 + + + Source Files\CUDA\Algo256 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\x13 + + + Source Files\CUDA\qubit + + + Source Files\CUDA\qubit + + + Source Files\CUDA\lyra2RE + + + Source Files\CUDA\lyra2RE + + + Source Files\CUDA\Algo256 + + + Source Files\CUDA\Algo256 + + + Source Files\CUDA\Algo256 + + + Source Files\CUDA\pluck + + + Source Files\CUDA\pluck + \ No newline at end of file diff --git a/ccminer.vcxproj.user b/ccminer.vcxproj.user new file mode 100644 index 0000000000..ace9a86acb --- /dev/null +++ b/ccminer.vcxproj.user @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/compat/thrust/CHANGELOG b/compat/thrust/CHANGELOG deleted file mode 100644 index 110c6689c2..0000000000 --- a/compat/thrust/CHANGELOG +++ /dev/null @@ -1,662 +0,0 @@ -####################################### -# Thrust v1.7.0 # -####################################### - -Summary - Thrust 1.7.0 introduces a new interface for controlling algorithm execution as - well as several new algorithms and performance improvements. With this new - interface, users may directly control how algorithms execute as well as details - such as the allocation of temporary storage. Key/value versions of thrust::merge - and the set operation algorithms have been added, as well stencil versions of - partitioning algorithms. thrust::tabulate has been introduced to tabulate the - values of functions taking integers. For 32b types, new CUDA merge and set - operations provide 2-15x faster performance while a new CUDA comparison sort - provides 1.3-4x faster performance. Finally, a new TBB reduce_by_key implementation - provides 80% faster performance. - -Breaking API Changes - Dispatch - Custom user backend systems' tag types must now inherit from the corresponding system's execution_policy template (e.g. thrust::cuda::execution_policy) instead - of the tag struct (e.g. thrust::cuda::tag). Otherwise, algorithm specializations will silently go unfound during dispatch. - See examples/minimal_custom_backend.cu and examples/cuda/fallback_allocator.cu for usage examples. - - thrust::advance and thrust::distance are no longer dispatched based on iterator system type and thus may no longer be customized. - - Iterators - iterator_facade and iterator_adaptor's Pointer template parameters have been eliminated. - iterator_adaptor has been moved into the thrust namespace (previously thrust::experimental::iterator_adaptor). - iterator_facade has been moved into the thrust namespace (previously thrust::experimental::iterator_facade). - iterator_core_access has been moved into the thrust namespace (previously thrust::experimental::iterator_core_access). - All iterators' nested pointer typedef (the type of the result of operator->) is now void instead of a pointer type to indicate that such expressions are currently impossible. - Floating point counting_iterators' nested difference_type typedef is now a signed integral type instead of a floating point type. - - Other - normal_distribution has been moved into the thrust::random namespace (previously thrust::random::experimental::normal_distribution). - Placeholder expressions may no longer include the comma operator. - -New Features - Execution Policies - Users may directly control the dispatch of algorithm invocations with optional execution policy arguments. - For example, instead of wrapping raw pointers allocated by cudaMalloc with thrust::device_ptr, the thrust::device execution_policy may be passed as an argument to an algorithm invocation to enable CUDA execution. - The following execution policies are supported in this version: - - thrust::host - thrust::device - thrust::cpp::par - thrust::cuda::par - thrust::omp::par - thrust::tbb::par - - Algorithms - free - get_temporary_buffer - malloc - merge_by_key - partition with stencil - partition_copy with stencil - return_temporary_buffer - set_difference_by_key - set_intersection_by_key - set_symmetric_difference_by_key - set_union_by_key - stable_partition with stencil - stable_partition_copy with stencil - tabulate - -New Examples - uninitialized_vector demonstrates how to use a custom allocator to avoid the automatic initialization of elements in thrust::device_vector. - -Other Enhancements - Authors of custom backend systems may manipulate arbitrary state during algorithm dispatch by incorporating it into their execution_policy parameter. - Users may control the allocation of temporary storage during algorithm execution by passing standard allocators as parameters via execution policies such as thrust::device. - THRUST_DEVICE_SYSTEM_CPP has been added as a compile-time target for the device backend. - CUDA merge performance is 2-15x faster. - CUDA comparison sort performance is 1.3-4x faster. - CUDA set operation performance is 1.5-15x faster. - TBB reduce_by_key performance is 80% faster. - Several algorithms have been parallelized with TBB. - Support for user allocators in vectors has been improved. - The sparse_vector example is now implemented with merge_by_key instead of sort_by_key. - Warnings have been eliminated in various contexts. - Warnings about __host__ or __device__-only functions called from __host__ __device__ functions have been eliminated in various contexts. - Documentation about algorithm requirements have been improved. - Simplified the minimal_custom_backend example. - Simplified the cuda/custom_temporary_allocation example. - Simplified the cuda/fallback_allocator example. - -Bug Fixes - #248 fix broken counting_iterator behavior with OpenMP - #231, #209 fix set operation failures with CUDA - #187 fix incorrect occupancy calculation with CUDA - #153 fix broken multigpu behavior with CUDA - #142 eliminate warning produced by thrust::random::taus88 and MSVC 2010 - #208 correctly initialize elements in temporary storage when necessary - #16 fix compilation error when sorting bool with CUDA - #10 fix ambiguous overloads of reinterpret_tag - -Known Issues - g++ versions 4.3 and lower may fail to dispatch thrust::get_temporary_buffer correctly causing infinite recursion in examples such as cuda/custom_temporary_allocation. - -Acknowledgments - Thanks to Sean Baxter, Bryan Catanzaro, and Manjunath Kudlur for contributing a faster merge implementation for CUDA. - Thanks to Sean Baxter for contributing a faster set operation implementation for CUDA. - Thanks to Cliff Woolley for contributing a correct occupancy calculation algorithm. - -####################################### -# Thrust v1.6.0 # -####################################### - -Summary - Thrust v1.6.0 provides an interface for customization and extension and a new - backend system based on the Threading Building Blocks library. With this - new interface, programmers may customize the behavior of specific algorithms - as well as control the allocation of temporary storage or invent entirely new - backends. These enhancements also allow multiple different backend systems - such as CUDA and OpenMP to coexist within a single program. Support for TBB - allows Thrust programs to integrate more naturally into applications which - may already employ the TBB task scheduler. - -Breaking API Changes - The header has been moved to - thrust::experimental::cuda::pinned_allocator has been moved to thrust::cuda::experimental::pinned_allocator - The macro THRUST_DEVICE_BACKEND has been renamed THRUST_DEVICE_SYSTEM - The macro THRUST_DEVICE_BACKEND_CUDA has been renamed THRUST_DEVICE_SYSTEM_CUDA - The macro THRUST_DEVICE_BACKEND_OMP has been renamed THRUST_DEVICE_SYSTEM_OMP - thrust::host_space_tag has been renamed thrust::host_system_tag - thrust::device_space_tag has been renamed thrust::device_system_tag - thrust::any_space_tag has been renamed thrust::any_system_tag - thrust::iterator_space has been renamed thrust::iterator_system - - -New Features - Backend Systems - Threading Building Blocks (TBB) is now supported - Functions - for_each_n - raw_reference_cast - Types - pointer - reference - -New Examples - cuda/custom_temporary_allocation - cuda/fallback_allocator - device_ptr - expand - minimal_custom_backend - raw_reference_cast - set_operations - -Other Enhancements - thrust::for_each now returns the end of the input range similar to most other algorithms - thrust::pair and thrust::tuple have swap functionality - all CUDA algorithms now support large data types - iterators may be dereferenced in user __device__ or __global__ functions - the safe use of different backend systems is now possible within a single binary - -Bug Fixes - #469 min_element and max_element algorithms no longer require a const comparison operator - -Known Issues - cudafe++.exe may crash when parsing TBB headers on Windows. - -####################################### -# Thrust v1.5.3 # -####################################### - -Summary - Small bug fixes - -Bug Fixes - Avoid warnings about potential race due to __shared__ non-POD variable - -####################################### -# Thrust v1.5.2 # -####################################### - -Summary - Small bug fixes - -Bug Fixes - Fixed warning about C-style initialization of structures - -####################################### -# Thrust v1.5.1 # -####################################### - -Summary - Small bug fixes - -Bug Fixes - Sorting data referenced by permutation_iterators on CUDA produces invalid results - -####################################### -# Thrust v1.5.0 # -####################################### - -Summary - Thrust v1.5.0 provides introduces new programmer productivity and performance - enhancements. New functionality for creating anonymous "lambda" functions has - been added. A faster host sort provides 2-10x faster performance for sorting - arithmetic types on (single-threaded) CPUs. A new OpenMP sort provides - 2.5x-3.0x speedup over the host sort using a quad-core CPU. When sorting - arithmetic types with the OpenMP backend the combined performance improvement - is 5.9x for 32-bit integers and ranges from 3.0x (64-bit types) to 14.2x - (8-bit types). A new CUDA reduce_by_key implementation provides 2-3x faster - performance. - -Breaking API Changes - device_ptr no longer unsafely converts to device_ptr without an - explicit cast. Use the expression - device_pointer_cast(static_cast(void_ptr.get())) - to convert, for example, device_ptr to device_ptr. - -New Features - Functions - stencil-less transform_if - - Types - lambda placeholders - -New Examples - lambda - -Other Enhancements - host sort is 2-10x faster for arithmetic types - OMP sort provides speedup over host sort - reduce_by_key is 2-3x faster - reduce_by_key no longer requires O(N) temporary storage - CUDA scan algorithms are 10-40% faster - host_vector and device_vector are now documented - out-of-memory exceptions now provide detailed information from CUDART - improved histogram example - device_reference now has a specialized swap - reduce_by_key and scan algorithms are compatible with discard_iterator - -Removed Functionality - -Bug Fixes - #44 allow host_vector to compile when value_type uses __align__ - #198 allow adjacent_difference to permit safe in-situ operation - #303 make thrust thread-safe - #313 avoid race conditions in device_vector::insert - #314 avoid unintended adl invocation when dispatching copy - #365 fix merge and set operation failures - -Known Issues - None - -Acknowledgments - Thanks to Manjunath Kudlur for contributing his Carbon library, from which the lambda functionality is derived. - Thanks to Jean-Francois Bastien for suggesting a fix for issue 303. - -####################################### -# Thrust v1.4.0 # -####################################### - -Summary - Thrust v1.4.0 provides support for CUDA 4.0 in addition to many feature - and performance improvements. New set theoretic algorithms operating on - sorted sequences have been added. Additionally, a new fancy iterator - allows discarding redundant or otherwise unnecessary output from - algorithms, conserving memory storage and bandwidth. - -Breaking API Changes - Eliminations - thrust/is_sorted.h - thrust/utility.h - thrust/set_intersection.h - thrust/experimental/cuda/ogl_interop_allocator.h and the functionality therein - thrust::deprecated::copy_when - thrust::deprecated::absolute_value - -New Features - Functions - copy_n - merge - set_difference - set_symmetric_difference - set_union - - Types - discard_iterator - - Device support - Compute Capability 2.1 GPUs - -New Examples - run_length_decoding - -Other Enhancements - Compilation warnings are substantially reduced in various contexts. - The compilation time of thrust::sort, thrust::stable_sort, thrust::sort_by_key, - and thrust::stable_sort_by_key are substantially reduced. - A fast sort implementation is used when sorting primitive types with thrust::greater. - The performance of thrust::set_intersection is improved. - The performance of thrust::fill is improved on SM 1.x devices. - A code example is now provided in each algorithm's documentation. - thrust::reverse now operates in-place - -Removed Functionality - thrust::deprecated::copy_when - thrust::deprecated::absolute_value - thrust::experimental::cuda::ogl_interop_allocator - thrust::gather and thrust::scatter from host to device and vice versa are no longer supported. - Operations which modify the elements of a thrust::device_vector are no longer - available from source code compiled without nvcc when the device backend is CUDA. - Instead, use the idiom from the cpp_interop example. - -Bug Fixes - #212 set_intersection works correctly for large input sizes. - #275 counting_iterator and constant_iterator work correctly with OpenMP as the - backend when compiling with optimization - #256 min and max correctly return their first argument as a tie-breaker - #248 NDEBUG is interpreted correctly - -Known Issues - nvcc may generate code containing warnings when compiling some Thrust algorithms. - When compiling with -arch=sm_1x, some Thrust algorithms may cause nvcc to issue - benign pointer advisories. - When compiling with -arch=sm_1x and -G, some Thrust algorithms may fail to execute correctly. - thrust::inclusive_scan, thrust::exclusive_scan, thrust::inclusive_scan_by_key, - and thrust::exclusive_scan_by_key are currently incompatible with thrust::discard_iterator. - -Acknowledgments - Thanks to David Tarjan for improving the performance of set_intersection. - Thanks to Duane Merrill for continued help with sort. - Thanks to Nathan Whitehead for help with CUDA Toolkit integration. - -####################################### -# Thrust v1.3.0 # -####################################### - -Summary - Thrust v1.3.0 provides support for CUDA 3.2 in addition to many feature - and performance enhancements. - - Performance of the sort and sort_by_key algorithms is improved by as much - as 3x in certain situations. The performance of stream compaction algorithms, - such as copy_if, is improved by as much as 2x. Reduction performance is - also improved, particularly for small input sizes. - - CUDA errors are now converted to runtime exceptions using the system_error - interface. Combined with a debug mode, also new in v1.3, runtime errors - can be located with greater precision. - - Lastly, a few header files have been consolidated or renamed for clarity. - See the deprecations section below for additional details. - - -Breaking API Changes - Promotions - thrust::experimental::inclusive_segmented_scan has been renamed thrust::inclusive_scan_by_key and exposes a different interface - thrust::experimental::exclusive_segmented_scan has been renamed thrust::exclusive_scan_by_key and exposes a different interface - thrust::experimental::partition_copy has been renamed thrust::partition_copy and exposes a different interface - thrust::next::gather has been renamed thrust::gather - thrust::next::gather_if has been renamed thrust::gather_if - thrust::unique_copy_by_key has been renamed thrust::unique_by_key_copy - Deprecations - thrust::copy_when has been renamed thrust::deprecated::copy_when - thrust::absolute_value has been renamed thrust::deprecated::absolute_value - The header thrust/set_intersection.h is now deprecated; use thrust/set_operations.h instead - The header thrust/utility.h is now deprecated; use thrust/swap.h instead - The header thrust/swap_ranges.h is now deprecated; use thrust/swap.h instead - Eliminations - thrust::deprecated::gather - thrust::deprecated::gather_if - thrust/experimental/arch.h and the functions therein - thrust/sorting/merge_sort.h - thrust/sorting/radix_sort.h - -New Features - Functions - exclusive_scan_by_key - find - find_if - find_if_not - inclusive_scan_by_key - is_partitioned - is_sorted_until - mismatch - partition_point - reverse - reverse_copy - stable_partition_copy - - Types - system_error and related types - experimental::cuda::ogl_interop_allocator - bit_and, bit_or, and bit_xor - - Device support - gf104-based GPUs - -New Examples - opengl_interop.cu - repeated_range.cu - simple_moving_average.cu - sparse_vector.cu - strided_range.cu - -Other Enhancements - Performance of thrust::sort and thrust::sort_by_key is substantially improved for primitive key types - Performance of thrust::copy_if is substantially improved - Performance of thrust::reduce and related reductions is improved - THRUST_DEBUG mode added - Callers of Thrust functions may detect error conditions by catching thrust::system_error, which derives from std::runtime_error - The number of compiler warnings generated by Thrust has been substantially reduced - Comparison sort now works correctly for input sizes > 32M - min & max usage no longer collides with definitions - Compiling against the OpenMP backend no longer requires nvcc - Performance of device_vector initialized in .cpp files is substantially improved in common cases - Performance of thrust::sort_by_key on the host is substantially improved - -Removed Functionality - nvcc 2.3 is no longer supported - -Bug Fixes - Debug device code now compiles correctly - thrust::uninitialized_copy and thrust::unintialized_fill now dispatch constructors on the device rather than the host - -Known Issues - #212 set_intersection is known to fail for large input sizes - partition_point is known to fail for 64b types with nvcc 3.2 - -Acknowledgments - Thanks to Duane Merrill for contributing a fast CUDA radix sort implementation - Thanks to Erich Elsen for contributing an implementation of find_if - Thanks to Andrew Corrigan for contributing changes which allow the OpenMP backend to compile in the absence of nvcc - Thanks to Andrew Corrigan, Cliff Wooley, David Coeurjolly, Janick Martinez Esturo, John Bowers, Maxim Naumov, Michael Garland, and Ryuta Suzuki for bug reports - Thanks to Cliff Woolley for help with testing - -####################################### -# Thrust v1.2.1 # -####################################### - -Summary - Small fixes for compatibility with CUDA 3.1 - -Known Issues - inclusive_scan & exclusive_scan may fail with very large types - the Microsoft compiler may fail to compile code using both sort and binary search algorithms - uninitialized_fill & uninitialized_copy dispatch constructors on the host rather than the device - # 109 some algorithms may exhibit poor performance with the OpenMP backend with large numbers (>= 6) of CPU threads - default_random_engine::discard is not accelerated with nvcc 2.3 - nvcc 3.1 may fail to compile code using types derived from thrust::subtract_with_carry_engine, such as thrust::ranlux24 & thrust::ranlux48. - -####################################### -# Thrust v1.2.0 # -####################################### - -Summary - Thrust v1.2 introduces support for compilation to multicore CPUs - and the Ocelot virtual machine, and several new facilities for - pseudo-random number generation. New algorithms such as set - intersection and segmented reduction have also been added. Lastly, - improvements to the robustness of the CUDA backend ensure - correctness across a broad set of (uncommon) use cases. - -Breaking API Changes - thrust::gather's interface was incorrect and has been removed. - The old interface is deprecated but will be preserved for Thrust - version 1.2 at thrust::deprecated::gather & - thrust::deprecated::gather_if. The new interface is provided at - thrust::next::gather & thrust::next::gather_if. The new interface - will be promoted to thrust:: in Thrust version 1.3. For more details, - please refer to this thread: - http://groups.google.com/group/thrust-users/browse_thread/thread/f5f0583cb97b51fd - - The thrust::sorting namespace has been deprecated in favor of the - top-level sorting functions, such as thrust::sort() and - thrust::sort_by_key(). - -New Features - Functions - reduce_by_key - set_intersection - tie - unique_copy - unique_by_key - unique_copy_by_key - - Types - Random Number Generation - discard_block_engine - default_random_engine - linear_congruential_engine - linear_feedback_shift_engine - minstd_rand - minstd_rand0 - normal_distribution (experimental) - ranlux24 - ranlux48 - ranlux24_base - ranlux48_base - subtract_with_carry_engine - taus88 - uniform_int_distribution - uniform_real_distribution - xor_combine_engine - Functionals - project1st - project2nd - - Fancy Iterators - permutation_iterator - reverse_iterator - - Device support - Add support for multicore CPUs via OpenMP - Add support for Fermi-class GPUs - Add support for Ocelot virtual machine - -New Examples - cpp_integration - histogram - mode - monte_carlo - monte_carlo_disjoint_sequences - padded_grid_reduction - permutation_iterator - row_sum - run_length_encoding - segmented_scan - stream_compaction - summary_statistics - transform_iterator - word_count - -Other Enhancements - vector functions operator!=, rbegin, crbegin, rend, crend, data, & shrink_to_fit - integer sorting performance is improved when max is large but (max - min) is small and when min is negative - performance of inclusive_scan() and exclusive_scan() is improved by 20-25% for primitive types - support for nvcc 3.0 - -Removed Functionality - removed support for equal between host & device sequences - removed support for gather() and scatter() between host & device sequences - -Bug Fixes - # 8 cause a compiler error if the required compiler is not found rather than a mysterious error at link time - # 42 device_ptr & device_reference are classes rather than structs, eliminating warnings on certain platforms - # 46 gather & scatter handle any space iterators correctly - # 51 thrust::experimental::arch functions gracefully handle unrecognized GPUs - # 52 avoid collisions with common user macros such as BLOCK_SIZE - # 62 provide better documentation for device_reference - # 68 allow built-in CUDA vector types to work with device_vector in pure C++ mode - # 102 eliminated a race condition in device_vector::erase - various compilation warnings eliminated - -Known Issues - inclusive_scan & exclusive_scan may fail with very large types - the Microsoft compiler may fail to compile code using both sort and binary search algorithms - uninitialized_fill & uninitialized_copy dispatch constructors on the host rather than the device - # 109 some algorithms may exhibit poor performance with the OpenMP backend with large numbers (>= 6) of CPU threads - default_random_engine::discard is not accelerated with nvcc 2.3 - -Acknowledgments - Thanks to Gregory Diamos for contributing a CUDA implementation of set_intersection - Thanks to Ryuta Suzuki & Gregory Diamos for rigorously testing Thrust's unit tests and examples against Ocelot - Thanks to Tom Bradley for contributing an implementation of normal_distribution - Thanks to Joseph Rhoads for contributing the example summary_statistics - -####################################### -# Thrust v1.1.1 # -####################################### - -Summary - Small fixes for compatibility with CUDA 2.3a and Mac OSX Snow Leopard. - -####################################### -# Thrust v1.1.0 # -####################################### - -Summary - Thrust v1.1 introduces fancy iterators, binary search functions, and - several specialized reduction functions. Experimental support for - segmented scan has also been added. - -Breaking API Changes - counting_iterator has been moved into the thrust namespace (previously thrust::experimental) - -New Features - Functions - copy_if - lower_bound - upper_bound - vectorized lower_bound - vectorized upper_bound - equal_range - binary_search - vectorized binary_search - all_of - any_of - none_of - minmax_element - advance - inclusive_segmented_scan (experimental) - exclusive_segmented_scan (experimental) - - Types - pair - tuple - device_malloc_allocator - - Fancy Iterators - constant_iterator - counting_iterator - transform_iterator - zip_iterator - -New Examples - computing the maximum absolute difference between vectors - computing the bounding box of a two-dimensional point set - sorting multiple arrays together (lexicographical sorting) - constructing a summed area table - using zip_iterator to mimic an array of structs - using constant_iterator to increment array values - -Other Enhancements - added pinned memory allocator (experimental) - added more methods to host_vector & device_vector (issue #4) - added variant of remove_if with a stencil argument (issue #29) - scan and reduce use cudaFuncGetAttributes to determine grid size - exceptions are reported when temporary device arrays cannot be allocated - -Bug Fixes - #5 make vector work for larger data types - #9 stable_partition_copy doesn't respect OutputIterator concept semantics - #10 scans should return OutputIterator - #16 make algorithms work for larger data types - #27 dispatch radix_sort even when comp=less is explicitly provided - -Known Issues - Using functors with Thrust entry points may not compile on Mac OSX with gcc-4.0.1 - uninitialized_copy & uninitialized_fill dispatch constructors on the host rather than the device. - inclusive_scan, inclusive_scan_by_key, exclusive_scan, and exclusive_scan_by_key may fail when used with large types with the CUDA 3.1 driver - - -####################################### -# Thrust v1.0.0 # -####################################### - -Breaking API changes - Rename top level namespace komrade to thrust. - Move partition_copy() & stable_partition_copy() into thrust::experimental namespace until we can easily provide the standard interface. - Rename range() to sequence() to avoid collision with Boost.Range. - Rename copy_if() to copy_when() due to semantic differences with C++0x copy_if(). - -New Features - Add C++0x style cbegin() & cend() methods to host_vector & device_vector. - Add transform_if function. - Add stencil versions of replace_if() & replace_copy_if(). - Allow counting_iterator to work with for_each(). - Allow types with constructors in comparison sort & reduce. - -Other Enhancements - merge_sort and stable_merge_sort are now 2 to 5x faster when executed on the parallel device. - -Bug fixes - Workaround an issue where an incremented iterator causes nvcc to crash. (Komrade issue #6) - Fix an issue where const_iterators could not be passed to transform. (Komrade issue #7) - diff --git a/compat/thrust/adjacent_difference.h b/compat/thrust/adjacent_difference.h deleted file mode 100644 index 772b5f993f..0000000000 --- a/compat/thrust/adjacent_difference.h +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file adjacent_difference.h - * \brief Compute difference between consecutive elements of a range - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup transformations Transformations - * \{ - */ - - -/*! \p adjacent_difference calculates the differences of adjacent elements in the - * range [first, last). That is, \*first is assigned to - * \*result, and, for each iterator \p i in the range - * [first + 1, last), the difference of \*i and *(i - 1) - * is assigned to \*(result + (i - first)). - * - * This version of \p adjacent_difference uses operator- to calculate - * differences. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param result The beginning of the output range. - * \return The iterator result + (last - first) - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \c x and \c y are objects of \p InputIterator's \c value_type, then \c x - \c is defined, - * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types, - * and the return type of x - y is convertible to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \remark Note that \p result is permitted to be the same iterator as \p first. This is - * useful for computing differences "in place". - * - * The following code snippet demonstrates how to use \p adjacent_difference to compute - * the difference between adjacent elements of a range using the \p thrust::device execution policy: - * - * \code - * #include - * #include - * #include - * ... - * int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2}; - * thrust::device_vector d_data(h_data, h_data + 8); - * thrust::device_vector d_result(8); - * - * thrust::adjacent_difference(thrust::device, d_data.begin(), d_data.end(), d_result.begin()); - * - * // d_result is now [1, 1, -1, 1, -1, 1, -1, 1] - * \endcode - * - * \see http://www.sgi.com/tech/stl/adjacent_difference.html - * \see inclusive_scan - */ -template -OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result); - -/*! \p adjacent_difference calculates the differences of adjacent elements in the - * range [first, last). That is, *first is assigned to - * \*result, and, for each iterator \p i in the range - * [first + 1, last), binary_op(\*i, \*(i - 1)) is assigned to - * \*(result + (i - first)). - * - * This version of \p adjacent_difference uses the binary function \p binary_op to - * calculate differences. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param result The beginning of the output range. - * \param binary_op The binary function used to compute differences. - * \return The iterator result + (last - first) - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type, - * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam BinaryFunction's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types. - * - * \remark Note that \p result is permitted to be the same iterator as \p first. This is - * useful for computing differences "in place". - * - * The following code snippet demonstrates how to use \p adjacent_difference to compute - * the sum between adjacent elements of a range using the \p thrust::device execution policy: - * - * \code - * #include - * #include - * #include - * #include - * ... - * int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2}; - * thrust::device_vector d_data(h_data, h_data + 8); - * thrust::device_vector d_result(8); - * - * thrust::adjacent_difference(thrust::device, d_data.begin(), d_data.end(), d_result.begin(), thrust::plus()); - * - * // d_data is now [1, 3, 3, 3, 3, 3, 3, 3] - * \endcode - * - * \see http://www.sgi.com/tech/stl/adjacent_difference.html - * \see inclusive_scan - */ -template -OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result, - BinaryFunction binary_op); - -/*! \p adjacent_difference calculates the differences of adjacent elements in the - * range [first, last). That is, \*first is assigned to - * \*result, and, for each iterator \p i in the range - * [first + 1, last), the difference of \*i and *(i - 1) - * is assigned to \*(result + (i - first)). - * - * This version of \p adjacent_difference uses operator- to calculate - * differences. - * - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param result The beginning of the output range. - * \return The iterator result + (last - first) - * - * \tparam InputIterator is a model of Input Iterator, - * and \c x and \c y are objects of \p InputIterator's \c value_type, then \c x - \c is defined, - * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types, - * and the return type of x - y is convertible to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \remark Note that \p result is permitted to be the same iterator as \p first. This is - * useful for computing differences "in place". - * - * The following code snippet demonstrates how to use \p adjacent_difference to compute - * the difference between adjacent elements of a range. - * - * \code - * #include - * #include - * ... - * int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2}; - * thrust::device_vector d_data(h_data, h_data + 8); - * thrust::device_vector d_result(8); - * - * thrust::adjacent_difference(d_data.begin(), d_data.end(), d_result.begin()); - * - * // d_result is now [1, 1, -1, 1, -1, 1, -1, 1] - * \endcode - * - * \see http://www.sgi.com/tech/stl/adjacent_difference.html - * \see inclusive_scan - */ -template -OutputIterator adjacent_difference(InputIterator first, InputIterator last, - OutputIterator result); - -/*! \p adjacent_difference calculates the differences of adjacent elements in the - * range [first, last). That is, *first is assigned to - * \*result, and, for each iterator \p i in the range - * [first + 1, last), binary_op(\*i, \*(i - 1)) is assigned to - * \*(result + (i - first)). - * - * This version of \p adjacent_difference uses the binary function \p binary_op to - * calculate differences. - * - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param result The beginning of the output range. - * \param binary_op The binary function used to compute differences. - * \return The iterator result + (last - first) - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type, - * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam BinaryFunction's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types. - * - * \remark Note that \p result is permitted to be the same iterator as \p first. This is - * useful for computing differences "in place". - * - * The following code snippet demonstrates how to use \p adjacent_difference to compute - * the sum between adjacent elements of a range. - * - * \code - * #include - * #include - * #include - * ... - * int h_data[8] = {1, 2, 1, 2, 1, 2, 1, 2}; - * thrust::device_vector d_data(h_data, h_data + 8); - * thrust::device_vector d_result(8); - * - * thrust::adjacent_difference(d_data.begin(), d_data.end(), d_result.begin(), thrust::plus()); - * - * // d_data is now [1, 3, 3, 3, 3, 3, 3, 3] - * \endcode - * - * \see http://www.sgi.com/tech/stl/adjacent_difference.html - * \see inclusive_scan - */ -template -OutputIterator adjacent_difference(InputIterator first, InputIterator last, - OutputIterator result, - BinaryFunction binary_op); - -/*! \} - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/advance.h b/compat/thrust/advance.h deleted file mode 100644 index e7f60b0d54..0000000000 --- a/compat/thrust/advance.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file advance.h - * \brief Advance an iterator by a given distance. - */ - -#pragma once - -#include - -namespace thrust -{ - - -/*! \addtogroup iterators - * \{ - */ - -/*! \p advance(i, n) increments the iterator \p i by the distance \p n. - * If n > 0 it is equivalent to executing ++i \p n - * times, and if n < 0 it is equivalent to executing --i - * \p n times. If n == 0, the call has no effect. - * - * \param i The iterator to be advanced. - * \param n The distance by which to advance the iterator. - * - * \tparam InputIterator is a model of Input Iterator. - * \tparam Distance is an integral type that is convertible to \p InputIterator's distance type. - * - * \pre \p n shall be negative only for bidirectional and random access iterators. - * - * The following code snippet demonstrates how to use \p advance to increment - * an iterator a given number of times. - * - * \code - * #include - * #include - * ... - * thrust::device_vector vec(13); - * thrust::device_vector::iterator iter = vec.begin(); - * - * thrust::advance(iter, 7); - * - * // iter - vec.begin() == 7 - * \endcode - * - * \see http://www.sgi.com/tech/stl/advance.html - */ -template -void advance(InputIterator& i, Distance n); - -/*! \} // end iterators - */ - -} // end thrust - -#include - diff --git a/compat/thrust/binary_search.h b/compat/thrust/binary_search.h deleted file mode 100644 index d2ac5a621e..0000000000 --- a/compat/thrust/binary_search.h +++ /dev/null @@ -1,1888 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file binary_search.h - * \brief Search for values in sorted ranges. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup algorithms - */ - - -/*! \addtogroup searching - * \ingroup algorithms - * \{ - */ - - -/*! \addtogroup binary_search Binary Search - * \ingroup searching - * \{ - */ - - -////////////////////// -// Scalar Functions // -////////////////////// - - -/*! \p lower_bound is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * Specifically, it returns the first position where value could be - * inserted without violating the ordering. This version of - * \p lower_bound uses operator< for comparison and returns - * the furthermost iterator \c i in [first, last) such that, - * for every iterator \c j in [first, i), *j < value. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \return The furthermost iterator \c i, such that *i < value. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam LessThanComparable is a model of LessThanComparable. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::lower_bound(thrust::device, input.begin(), input.end(), 0); // returns input.begin() - * thrust::lower_bound(thrust::device, input.begin(), input.end(), 1); // returns input.begin() + 1 - * thrust::lower_bound(thrust::device, input.begin(), input.end(), 2); // returns input.begin() + 1 - * thrust::lower_bound(thrust::device, input.begin(), input.end(), 3); // returns input.begin() + 2 - * thrust::lower_bound(thrust::device, input.begin(), input.end(), 8); // returns input.begin() + 4 - * thrust::lower_bound(thrust::device, input.begin(), input.end(), 9); // returns input.end() - * \endcode - * - * \see http://www.sgi.com/tech/stl/lower_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable &value); - - -/*! \p lower_bound is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * Specifically, it returns the first position where value could be - * inserted without violating the ordering. This version of - * \p lower_bound uses operator< for comparison and returns - * the furthermost iterator \c i in [first, last) such that, - * for every iterator \c j in [first, i), *j < value. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \return The furthermost iterator \c i, such that *i < value. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam LessThanComparable is a model of LessThanComparable. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for values in a ordered range. - * - * \code - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::lower_bound(input.begin(), input.end(), 0); // returns input.begin() - * thrust::lower_bound(input.begin(), input.end(), 1); // returns input.begin() + 1 - * thrust::lower_bound(input.begin(), input.end(), 2); // returns input.begin() + 1 - * thrust::lower_bound(input.begin(), input.end(), 3); // returns input.begin() + 2 - * thrust::lower_bound(input.begin(), input.end(), 8); // returns input.begin() + 4 - * thrust::lower_bound(input.begin(), input.end(), 9); // returns input.end() - * \endcode - * - * \see http://www.sgi.com/tech/stl/lower_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -ForwardIterator lower_bound(ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value); - - -/*! \p lower_bound is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * Specifically, it returns the first position where value could be - * inserted without violating the ordering. This version of - * \p lower_bound uses function object \c comp for comparison - * and returns the furthermost iterator \c i in [first, last) - * such that, for every iterator \c j in [first, i), - * comp(*j, value) is \c true. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \param comp The comparison operator. - * \return The furthermost iterator \c i, such that comp(*i, value) is \c true. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam T is comparable to \p ForwardIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::lower_bound(input.begin(), input.end(), 0, thrust::less()); // returns input.begin() - * thrust::lower_bound(input.begin(), input.end(), 1, thrust::less()); // returns input.begin() + 1 - * thrust::lower_bound(input.begin(), input.end(), 2, thrust::less()); // returns input.begin() + 1 - * thrust::lower_bound(input.begin(), input.end(), 3, thrust::less()); // returns input.begin() + 2 - * thrust::lower_bound(input.begin(), input.end(), 8, thrust::less()); // returns input.begin() + 4 - * thrust::lower_bound(input.begin(), input.end(), 9, thrust::less()); // returns input.end() - * \endcode - * - * \see http://www.sgi.com/tech/stl/lower_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &value, - StrictWeakOrdering comp); - - -/*! \p lower_bound is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * Specifically, it returns the first position where value could be - * inserted without violating the ordering. This version of - * \p lower_bound uses function object \c comp for comparison - * and returns the furthermost iterator \c i in [first, last) - * such that, for every iterator \c j in [first, i), - * comp(*j, value) is \c true. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \param comp The comparison operator. - * \return The furthermost iterator \c i, such that comp(*i, value) is \c true. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam T is comparable to \p ForwardIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for values in a ordered range. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::lower_bound(input.begin(), input.end(), 0, thrust::less()); // returns input.begin() - * thrust::lower_bound(input.begin(), input.end(), 1, thrust::less()); // returns input.begin() + 1 - * thrust::lower_bound(input.begin(), input.end(), 2, thrust::less()); // returns input.begin() + 1 - * thrust::lower_bound(input.begin(), input.end(), 3, thrust::less()); // returns input.begin() + 2 - * thrust::lower_bound(input.begin(), input.end(), 8, thrust::less()); // returns input.begin() + 4 - * thrust::lower_bound(input.begin(), input.end(), 9, thrust::less()); // returns input.end() - * \endcode - * - * \see http://www.sgi.com/tech/stl/lower_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -ForwardIterator lower_bound(ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp); - - -/*! \p upper_bound is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * Specifically, it returns the last position where value could be - * inserted without violating the ordering. This version of - * \p upper_bound uses operator< for comparison and returns - * the furthermost iterator \c i in [first, last) such that, - * for every iterator \c j in [first, i), value < *j - * is \c false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \return The furthermost iterator \c i, such that value < *i is \c false. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam LessThanComparable is a model of LessThanComparable. - * - * The following code snippet demonstrates how to use \p upper_bound - * to search for values in a ordered range using the \p thrust::device execution policy for parallelism: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 0); // returns input.begin() + 1 - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 1); // returns input.begin() + 1 - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 2); // returns input.begin() + 2 - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 3); // returns input.begin() + 2 - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 8); // returns input.end() - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 9); // returns input.end() - * \endcode - * - * \see http://www.sgi.com/tech/stl/upper_bound.html - * \see \p lower_bound - * \see \p equal_range - * \see \p binary_search - */ -template -ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable &value); - - -/*! \p upper_bound is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * Specifically, it returns the last position where value could be - * inserted without violating the ordering. This version of - * \p upper_bound uses operator< for comparison and returns - * the furthermost iterator \c i in [first, last) such that, - * for every iterator \c j in [first, i), value < *j - * is \c false. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \return The furthermost iterator \c i, such that value < *i is \c false. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam LessThanComparable is a model of LessThanComparable. - * - * The following code snippet demonstrates how to use \p upper_bound - * to search for values in a ordered range. - * - * \code - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::upper_bound(input.begin(), input.end(), 0); // returns input.begin() + 1 - * thrust::upper_bound(input.begin(), input.end(), 1); // returns input.begin() + 1 - * thrust::upper_bound(input.begin(), input.end(), 2); // returns input.begin() + 2 - * thrust::upper_bound(input.begin(), input.end(), 3); // returns input.begin() + 2 - * thrust::upper_bound(input.begin(), input.end(), 8); // returns input.end() - * thrust::upper_bound(input.begin(), input.end(), 9); // returns input.end() - * \endcode - * - * \see http://www.sgi.com/tech/stl/upper_bound.html - * \see \p lower_bound - * \see \p equal_range - * \see \p binary_search - */ -template -ForwardIterator upper_bound(ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value); - - -/*! \p upper_bound is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * Specifically, it returns the last position where value could be - * inserted without violating the ordering. This version of - * \p upper_bound uses function object \c comp for comparison and returns - * the furthermost iterator \c i in [first, last) such that, - * for every iterator \c j in [first, i), comp(value, *j) - * is \c false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \param comp The comparison operator. - * \return The furthermost iterator \c i, such that comp(value, *i) is \c false. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam T is comparable to \p ForwardIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p upper_bound - * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 0, thrust::less()); // returns input.begin() + 1 - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 1, thrust::less()); // returns input.begin() + 1 - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 2, thrust::less()); // returns input.begin() + 2 - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 3, thrust::less()); // returns input.begin() + 2 - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 8, thrust::less()); // returns input.end() - * thrust::upper_bound(thrust::device, input.begin(), input.end(), 9, thrust::less()); // returns input.end() - * \endcode - * - * \see http://www.sgi.com/tech/stl/upper_bound.html - * \see \p lower_bound - * \see \p equal_range - * \see \p binary_search - */ -template -ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &value, - StrictWeakOrdering comp); - -/*! \p upper_bound is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * Specifically, it returns the last position where value could be - * inserted without violating the ordering. This version of - * \p upper_bound uses function object \c comp for comparison and returns - * the furthermost iterator \c i in [first, last) such that, - * for every iterator \c j in [first, i), comp(value, *j) - * is \c false. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \param comp The comparison operator. - * \return The furthermost iterator \c i, such that comp(value, *i) is \c false. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam T is comparable to \p ForwardIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p upper_bound - * to search for values in a ordered range. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::upper_bound(input.begin(), input.end(), 0, thrust::less()); // returns input.begin() + 1 - * thrust::upper_bound(input.begin(), input.end(), 1, thrust::less()); // returns input.begin() + 1 - * thrust::upper_bound(input.begin(), input.end(), 2, thrust::less()); // returns input.begin() + 2 - * thrust::upper_bound(input.begin(), input.end(), 3, thrust::less()); // returns input.begin() + 2 - * thrust::upper_bound(input.begin(), input.end(), 8, thrust::less()); // returns input.end() - * thrust::upper_bound(input.begin(), input.end(), 9, thrust::less()); // returns input.end() - * \endcode - * - * \see http://www.sgi.com/tech/stl/upper_bound.html - * \see \p lower_bound - * \see \p equal_range - * \see \p binary_search - */ -template -ForwardIterator upper_bound(ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp); - - -/*! \p binary_search is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * It returns \c true if an element that is equivalent to \c value - * is present in [first, last) and \c false if no such element - * exists. Specifically, this version returns \c true if and only if - * there exists an iterator \c i in [first, last) such that - * *i < value and value < *i are both \c false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \return \c true if an equivalent element exists in [first, last), otherwise \c false. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam LessThanComparable is a model of LessThanComparable. - * - * The following code snippet demonstrates how to use \p binary_search - * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::binary_search(thrust::device, input.begin(), input.end(), 0); // returns true - * thrust::binary_search(thrust::device, input.begin(), input.end(), 1); // returns false - * thrust::binary_search(thrust::device, input.begin(), input.end(), 2); // returns true - * thrust::binary_search(thrust::device, input.begin(), input.end(), 3); // returns false - * thrust::binary_search(thrust::device, input.begin(), input.end(), 8); // returns true - * thrust::binary_search(thrust::device, input.begin(), input.end(), 9); // returns false - * \endcode - * - * \see http://www.sgi.com/tech/stl/binary_search.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p equal_range - */ -template -bool binary_search(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value); - - -/*! \p binary_search is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * It returns \c true if an element that is equivalent to \c value - * is present in [first, last) and \c false if no such element - * exists. Specifically, this version returns \c true if and only if - * there exists an iterator \c i in [first, last) such that - * *i < value and value < *i are both \c false. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \return \c true if an equivalent element exists in [first, last), otherwise \c false. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam LessThanComparable is a model of LessThanComparable. - * - * The following code snippet demonstrates how to use \p binary_search - * to search for values in a ordered range. - * - * \code - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::binary_search(input.begin(), input.end(), 0); // returns true - * thrust::binary_search(input.begin(), input.end(), 1); // returns false - * thrust::binary_search(input.begin(), input.end(), 2); // returns true - * thrust::binary_search(input.begin(), input.end(), 3); // returns false - * thrust::binary_search(input.begin(), input.end(), 8); // returns true - * thrust::binary_search(input.begin(), input.end(), 9); // returns false - * \endcode - * - * \see http://www.sgi.com/tech/stl/binary_search.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p equal_range - */ -template -bool binary_search(ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value); - - -/*! \p binary_search is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * It returns \c true if an element that is equivalent to \c value - * is present in [first, last) and \c false if no such element - * exists. Specifically, this version returns \c true if and only if - * there exists an iterator \c i in [first, last) such that - * comp(*i, value) and comp(value, *i) are both \c false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \param comp The comparison operator. - * \return \c true if an equivalent element exists in [first, last), otherwise \c false. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam T is comparable to \p ForwardIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p binary_search - * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::binary_search(thrust::device, input.begin(), input.end(), 0, thrust::less()); // returns true - * thrust::binary_search(thrust::device, input.begin(), input.end(), 1, thrust::less()); // returns false - * thrust::binary_search(thrust::device, input.begin(), input.end(), 2, thrust::less()); // returns true - * thrust::binary_search(thrust::device, input.begin(), input.end(), 3, thrust::less()); // returns false - * thrust::binary_search(thrust::device, input.begin(), input.end(), 8, thrust::less()); // returns true - * thrust::binary_search(thrust::device, input.begin(), input.end(), 9, thrust::less()); // returns false - * \endcode - * - * \see http://www.sgi.com/tech/stl/binary_search.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p equal_range - */ -template -bool binary_search(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp); - - -/*! \p binary_search is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). - * It returns \c true if an element that is equivalent to \c value - * is present in [first, last) and \c false if no such element - * exists. Specifically, this version returns \c true if and only if - * there exists an iterator \c i in [first, last) such that - * comp(*i, value) and comp(value, *i) are both \c false. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \param comp The comparison operator. - * \return \c true if an equivalent element exists in [first, last), otherwise \c false. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam T is comparable to \p ForwardIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p binary_search - * to search for values in a ordered range. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::binary_search(input.begin(), input.end(), 0, thrust::less()); // returns true - * thrust::binary_search(input.begin(), input.end(), 1, thrust::less()); // returns false - * thrust::binary_search(input.begin(), input.end(), 2, thrust::less()); // returns true - * thrust::binary_search(input.begin(), input.end(), 3, thrust::less()); // returns false - * thrust::binary_search(input.begin(), input.end(), 8, thrust::less()); // returns true - * thrust::binary_search(input.begin(), input.end(), 9, thrust::less()); // returns false - * \endcode - * - * \see http://www.sgi.com/tech/stl/binary_search.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p equal_range - */ -template -bool binary_search(ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp); - - -/*! \p equal_range is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). The - * value returned by \p equal_range is essentially a combination of - * the values returned by \p lower_bound and \p upper_bound: it returns - * a \p pair of iterators \c i and \c j such that \c i is the first - * position where value could be inserted without violating the - * ordering and \c j is the last position where value could be inserted - * without violating the ordering. It follows that every element in the - * range [i, j) is equivalent to value, and that - * [i, j) is the largest subrange of [first, last) that - * has this property. - * - * This version of \p equal_range returns a \p pair of iterators - * [i, j), where \c i is the furthermost iterator in - * [first, last) such that, for every iterator \c k in - * [first, i), *k < value. \c j is the furthermost - * iterator in [first, last) such that, for every iterator - * \c k in [first, j), value < *k is \c false. - * For every iterator \c k in [i, j), neither - * value < *k nor *k < value is \c true. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \return A \p pair of iterators [i, j) that define the range of equivalent elements. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam LessThanComparable is a model of LessThanComparable. - * - * The following code snippet demonstrates how to use \p equal_range - * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::equal_range(thrust::device, input.begin(), input.end(), 0); // returns [input.begin(), input.begin() + 1) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 1); // returns [input.begin() + 1, input.begin() + 1) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 2); // returns [input.begin() + 1, input.begin() + 2) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 3); // returns [input.begin() + 2, input.begin() + 2) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 8); // returns [input.begin() + 4, input.end) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 9); // returns [input.end(), input.end) - * \endcode - * - * \see http://www.sgi.com/tech/stl/equal_range.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p binary_search - */ -template -thrust::pair -equal_range(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value); - - -/*! \p equal_range is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). The - * value returned by \p equal_range is essentially a combination of - * the values returned by \p lower_bound and \p upper_bound: it returns - * a \p pair of iterators \c i and \c j such that \c i is the first - * position where value could be inserted without violating the - * ordering and \c j is the last position where value could be inserted - * without violating the ordering. It follows that every element in the - * range [i, j) is equivalent to value, and that - * [i, j) is the largest subrange of [first, last) that - * has this property. - * - * This version of \p equal_range returns a \p pair of iterators - * [i, j), where \c i is the furthermost iterator in - * [first, last) such that, for every iterator \c k in - * [first, i), *k < value. \c j is the furthermost - * iterator in [first, last) such that, for every iterator - * \c k in [first, j), value < *k is \c false. - * For every iterator \c k in [i, j), neither - * value < *k nor *k < value is \c true. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \return A \p pair of iterators [i, j) that define the range of equivalent elements. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam LessThanComparable is a model of LessThanComparable. - * - * The following code snippet demonstrates how to use \p equal_range - * to search for values in a ordered range. - * - * \code - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::equal_range(input.begin(), input.end(), 0); // returns [input.begin(), input.begin() + 1) - * thrust::equal_range(input.begin(), input.end(), 1); // returns [input.begin() + 1, input.begin() + 1) - * thrust::equal_range(input.begin(), input.end(), 2); // returns [input.begin() + 1, input.begin() + 2) - * thrust::equal_range(input.begin(), input.end(), 3); // returns [input.begin() + 2, input.begin() + 2) - * thrust::equal_range(input.begin(), input.end(), 8); // returns [input.begin() + 4, input.end) - * thrust::equal_range(input.begin(), input.end(), 9); // returns [input.end(), input.end) - * \endcode - * - * \see http://www.sgi.com/tech/stl/equal_range.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p binary_search - */ -template -thrust::pair -equal_range(ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value); - - -/*! \p equal_range is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). The - * value returned by \p equal_range is essentially a combination of - * the values returned by \p lower_bound and \p upper_bound: it returns - * a \p pair of iterators \c i and \c j such that \c i is the first - * position where value could be inserted without violating the - * ordering and \c j is the last position where value could be inserted - * without violating the ordering. It follows that every element in the - * range [i, j) is equivalent to value, and that - * [i, j) is the largest subrange of [first, last) that - * has this property. - * - * This version of \p equal_range returns a \p pair of iterators - * [i, j). \c i is the furthermost iterator in - * [first, last) such that, for every iterator \c k in - * [first, i), comp(*k, value) is \c true. - * \c j is the furthermost iterator in [first, last) such - * that, for every iterator \c k in [first, last), - * comp(value, *k) is \c false. For every iterator \c k - * in [i, j), neither comp(value, *k) nor - * comp(*k, value) is \c true. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \param comp The comparison operator. - * \return A \p pair of iterators [i, j) that define the range of equivalent elements. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam T is comparable to \p ForwardIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p equal_range - * to search for values in a ordered range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::equal_range(thrust::device, input.begin(), input.end(), 0, thrust::less()); // returns [input.begin(), input.begin() + 1) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 1, thrust::less()); // returns [input.begin() + 1, input.begin() + 1) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 2, thrust::less()); // returns [input.begin() + 1, input.begin() + 2) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 3, thrust::less()); // returns [input.begin() + 2, input.begin() + 2) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 8, thrust::less()); // returns [input.begin() + 4, input.end) - * thrust::equal_range(thrust::device, input.begin(), input.end(), 9, thrust::less()); // returns [input.end(), input.end) - * \endcode - * - * \see http://www.sgi.com/tech/stl/equal_range.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p binary_search - */ -template -thrust::pair -equal_range(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp); - - -/*! \p equal_range is a version of binary search: it attempts to find - * the element value in an ordered range [first, last). The - * value returned by \p equal_range is essentially a combination of - * the values returned by \p lower_bound and \p upper_bound: it returns - * a \p pair of iterators \c i and \c j such that \c i is the first - * position where value could be inserted without violating the - * ordering and \c j is the last position where value could be inserted - * without violating the ordering. It follows that every element in the - * range [i, j) is equivalent to value, and that - * [i, j) is the largest subrange of [first, last) that - * has this property. - * - * This version of \p equal_range returns a \p pair of iterators - * [i, j). \c i is the furthermost iterator in - * [first, last) such that, for every iterator \c k in - * [first, i), comp(*k, value) is \c true. - * \c j is the furthermost iterator in [first, last) such - * that, for every iterator \c k in [first, last), - * comp(value, *k) is \c false. For every iterator \c k - * in [i, j), neither comp(value, *k) nor - * comp(*k, value) is \c true. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param value The value to be searched. - * \param comp The comparison operator. - * \return A \p pair of iterators [i, j) that define the range of equivalent elements. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam T is comparable to \p ForwardIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p equal_range - * to search for values in a ordered range. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::equal_range(input.begin(), input.end(), 0, thrust::less()); // returns [input.begin(), input.begin() + 1) - * thrust::equal_range(input.begin(), input.end(), 1, thrust::less()); // returns [input.begin() + 1, input.begin() + 1) - * thrust::equal_range(input.begin(), input.end(), 2, thrust::less()); // returns [input.begin() + 1, input.begin() + 2) - * thrust::equal_range(input.begin(), input.end(), 3, thrust::less()); // returns [input.begin() + 2, input.begin() + 2) - * thrust::equal_range(input.begin(), input.end(), 8, thrust::less()); // returns [input.begin() + 4, input.end) - * thrust::equal_range(input.begin(), input.end(), 9, thrust::less()); // returns [input.end(), input.end) - * \endcode - * - * \see http://www.sgi.com/tech/stl/equal_range.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p binary_search - */ -template -thrust::pair -equal_range(ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp); - - -/*! \addtogroup vectorized_binary_search Vectorized Searches - * \ingroup binary_search - * \{ - */ - - -////////////////////// -// Vector Functions // -////////////////////// - - -/*! \p lower_bound is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * Specifically, it returns the index of first position where value could - * be inserted without violating the ordering. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is LessThanComparable. - * \tparam OutputIterator is a model of Output Iterator. - * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for multiple values in a ordered range using the \p thrust::device execution policy for - * parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::lower_bound(thrust::device, - * input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin()); - * - * // output is now [0, 1, 1, 2, 4, 5] - * \endcode - * - * \see http://www.sgi.com/tech/stl/lower_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result); - - -/*! \p lower_bound is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * Specifically, it returns the index of first position where value could - * be inserted without violating the ordering. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is LessThanComparable. - * \tparam OutputIterator is a model of Output Iterator. - * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for multiple values in a ordered range. - * - * \code - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::lower_bound(input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin()); - * - * // output is now [0, 1, 1, 2, 4, 5] - * \endcode - * - * \see http://www.sgi.com/tech/stl/lower_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -OutputIterator lower_bound(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result); - - -/*! \p lower_bound is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * Specifically, it returns the index of first position where value could - * be inserted without violating the ordering. This version of - * \p lower_bound uses function object \c comp for comparison. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * \param comp The comparison operator. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator. - * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for multiple values in a ordered range. - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::lower_bound(input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin(), - * thrust::less()); - * - * // output is now [0, 1, 1, 2, 4, 5] - * \endcode - * - * \see http://www.sgi.com/tech/stl/lower_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result, - StrictWeakOrdering comp); - - -/*! \p lower_bound is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * Specifically, it returns the index of first position where value could - * be inserted without violating the ordering. This version of - * \p lower_bound uses function object \c comp for comparison. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * \param comp The comparison operator. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator. - * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for multiple values in a ordered range. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::lower_bound(input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin(), - * thrust::less()); - * - * // output is now [0, 1, 1, 2, 4, 5] - * \endcode - * - * \see http://www.sgi.com/tech/stl/lower_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -OutputIterator lower_bound(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result, - StrictWeakOrdering comp); - - -/*! \p upper_bound is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * Specifically, it returns the index of last position where value could - * be inserted without violating the ordering. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is LessThanComparable. - * \tparam OutputIterator is a model of Output Iterator. - * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for multiple values in a ordered range using the \p thrust::device execution policy for - * parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::upper_bound(thrust::device, - * input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin()); - * - * // output is now [1, 1, 2, 2, 5, 5] - * \endcode - * - * \see http://www.sgi.com/tech/stl/upper_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result); - - -/*! \p upper_bound is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * Specifically, it returns the index of last position where value could - * be inserted without violating the ordering. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is LessThanComparable. - * \tparam OutputIterator is a model of Output Iterator. - * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for multiple values in a ordered range. - * - * \code - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::upper_bound(input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin()); - * - * // output is now [1, 1, 2, 2, 5, 5] - * \endcode - * - * \see http://www.sgi.com/tech/stl/upper_bound.html - * \see \p upper_bound - * \see \p equal_range - * \see \p binary_search - */ -template -OutputIterator upper_bound(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result); - - -/*! \p upper_bound is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * Specifically, it returns the index of first position where value could - * be inserted without violating the ordering. This version of - * \p upper_bound uses function object \c comp for comparison. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * \param comp The comparison operator. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator. - * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for multiple values in a ordered range using the \p thrust::device execution policy for - * parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::upper_bound(thrust::device, - * input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin(), - * thrust::less()); - * - * // output is now [1, 1, 2, 2, 5, 5] - * \endcode - * - * \see http://www.sgi.com/tech/stl/upper_bound.html - * \see \p lower_bound - * \see \p equal_range - * \see \p binary_search - */ -template -OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result, - StrictWeakOrdering comp); - - -/*! \p upper_bound is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * Specifically, it returns the index of first position where value could - * be inserted without violating the ordering. This version of - * \p upper_bound uses function object \c comp for comparison. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * \param comp The comparison operator. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is comparable to \p ForwardIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator. - * and \c ForwardIterator's difference_type is convertible to \c OutputIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p lower_bound - * to search for multiple values in a ordered range. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::upper_bound(input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin(), - * thrust::less()); - * - * // output is now [1, 1, 2, 2, 5, 5] - * \endcode - * - * \see http://www.sgi.com/tech/stl/upper_bound.html - * \see \p lower_bound - * \see \p equal_range - * \see \p binary_search - */ -template -OutputIterator upper_bound(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result, - StrictWeakOrdering comp); - - -/*! \p binary_search is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * It returns \c true if an element that is equivalent to \c value - * is present in [first, last) and \c false if no such element - * exists. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is LessThanComparable. - * \tparam OutputIterator is a model of Output Iterator. - * and bool is convertible to \c OutputIterator's \c value_type. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p binary_search - * to search for multiple values in a ordered range using the \p thrust::device execution policy for - * parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::binary_search(thrust::device, - * input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin()); - * - * // output is now [true, false, true, false, true, false] - * \endcode - * - * \see http://www.sgi.com/tech/stl/binary_search.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p equal_range - */ -template -OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result); - - -/*! \p binary_search is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * It returns \c true if an element that is equivalent to \c value - * is present in [first, last) and \c false if no such element - * exists. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is LessThanComparable. - * \tparam OutputIterator is a model of Output Iterator. - * and bool is convertible to \c OutputIterator's \c value_type. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p binary_search - * to search for multiple values in a ordered range. - * - * \code - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::binary_search(input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin()); - * - * // output is now [true, false, true, false, true, false] - * \endcode - * - * \see http://www.sgi.com/tech/stl/binary_search.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p equal_range - */ -template -OutputIterator binary_search(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result); - - -/*! \p binary_search is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * It returns \c true if an element that is equivalent to \c value - * is present in [first, last) and \c false if no such element - * exists. This version of \p binary_search uses function object - * \c comp for comparison. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * \param comp The comparison operator. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is LessThanComparable. - * \tparam OutputIterator is a model of Output Iterator. - * and bool is convertible to \c OutputIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p binary_search - * to search for multiple values in a ordered range using the \p thrust::device execution policy for - * parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::binary_search(thrust::device, - * input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin(), - * thrust::less()); - * - * // output is now [true, false, true, false, true, false] - * \endcode - * - * \see http://www.sgi.com/tech/stl/binary_search.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p equal_range - */ -template -OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result, - StrictWeakOrdering comp); - - -/*! \p binary_search is a vectorized version of binary search: for each - * iterator \c v in [values_first, values_last) it attempts to - * find the value *v in an ordered range [first, last). - * It returns \c true if an element that is equivalent to \c value - * is present in [first, last) and \c false if no such element - * exists. This version of \p binary_search uses function object - * \c comp for comparison. - * - * \param first The beginning of the ordered sequence. - * \param last The end of the ordered sequence. - * \param values_first The beginning of the search values sequence. - * \param values_last The end of the search values sequence. - * \param result The beginning of the output sequence. - * \param comp The comparison operator. - * - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam InputIterator is a model of Input Iterator. - * and \c InputIterator's \c value_type is LessThanComparable. - * \tparam OutputIterator is a model of Output Iterator. - * and bool is convertible to \c OutputIterator's \c value_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The ranges [first,last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p binary_search - * to search for multiple values in a ordered range. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(5); - * - * input[0] = 0; - * input[1] = 2; - * input[2] = 5; - * input[3] = 7; - * input[4] = 8; - * - * thrust::device_vector values(6); - * values[0] = 0; - * values[1] = 1; - * values[2] = 2; - * values[3] = 3; - * values[4] = 8; - * values[5] = 9; - * - * thrust::device_vector output(6); - * - * thrust::binary_search(input.begin(), input.end(), - * values.begin(), values.end(), - * output.begin(), - * thrust::less()); - * - * // output is now [true, false, true, false, true, false] - * \endcode - * - * \see http://www.sgi.com/tech/stl/binary_search.html - * \see \p lower_bound - * \see \p upper_bound - * \see \p equal_range - */ -template -OutputIterator binary_search(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator result, - StrictWeakOrdering comp); - - -/*! \} // end vectorized_binary_search - */ - - -/*! \} // end binary_search - */ - - -/*! \} // end searching - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/copy.h b/compat/thrust/copy.h deleted file mode 100644 index eaa9719459..0000000000 --- a/compat/thrust/copy.h +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file copy.h - * \brief Copies elements from one range to another - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -/*! \addtogroup algorithms - */ - -/*! \addtogroup copying - * \ingroup algorithms - * \{ - */ - - -/*! \p copy copies elements from the range [\p first, \p last) to the range - * [\p result, \p result + (\p last - \p first)). That is, it performs - * the assignments *\p result = *\p first, *(\p result + \c 1) = *(\p first + \c 1), - * and so on. Generally, for every integer \c n from \c 0 to \p last - \p first, \p copy - * performs the assignment *(\p result + \c n) = *(\p first + \c n). Unlike - * \c std::copy, \p copy offers no guarantee on order of operation. As a result, - * calling \p copy with overlapping source and destination ranges has undefined - * behavior. - * - * The return value is \p result + (\p last - \p first). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence to copy. - * \param last The end of the sequence to copy. - * \param result The destination sequence. - * \return The end of the destination sequence. - * \see http://www.sgi.com/tech/stl/copy.html - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator must be a model of Output Iterator. - * - * \pre \p result may be equal to \p first, but \p result shall not be in the range [first, last) otherwise. - * - * The following code snippet demonstrates how to use \p copy - * to copy from one range to another using the \p thrust::device parallelization policy: - * - * \code - * #include - * #include - * #include - * ... - * - * thrust::device_vector vec0(100); - * thrust::device_vector vec1(100); - * ... - * - * thrust::copy(thrust::device, vec0.begin(), vec0.end(), vec1.begin()); - * - * // vec1 is now a copy of vec0 - * \endcode - */ -template - OutputIterator copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -/*! \p copy_n copies elements from the range [first, first + n) to the range - * [result, result + n). That is, it performs the assignments *result = *first, *(result + 1) = *(first + 1), - * and so on. Generally, for every integer \c i from \c 0 to \c n, \p copy - * performs the assignment *(\p result + \c i) = *(\p first + \c i). Unlike - * \c std::copy_n, \p copy_n offers no guarantee on order of operation. As a result, - * calling \p copy_n with overlapping source and destination ranges has undefined - * behavior. - * - * The return value is \p result + \p n. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range to copy. - * \param n The number of elements to copy. - * \param result The beginning destination range. - * \return The end of the destination range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam Size is an integral type. - * \tparam OutputIterator must be a model of Output Iterator. - * - * \pre \p result may be equal to \p first, but \p result shall not be in the range [first, first + n) otherwise. - * - * The following code snippet demonstrates how to use \p copy - * to copy from one range to another using the \p thrust::device parallelization policy: - * - * \code - * #include - * #include - * #include - * ... - * size_t n = 100; - * thrust::device_vector vec0(n); - * thrust::device_vector vec1(n); - * ... - * thrust::copy_n(thrust::device, vec0.begin(), n, vec1.begin()); - * - * // vec1 is now a copy of vec0 - * \endcode - * - * \see http://www.sgi.com/tech/stl/copy_n.html - * \see thrust::copy - */ -template - OutputIterator copy_n(const thrust::detail::execution_policy_base &exec, - InputIterator first, - Size n, - OutputIterator result); - - - -/*! \p copy copies elements from the range [\p first, \p last) to the range - * [\p result, \p result + (\p last - \p first)). That is, it performs - * the assignments *\p result = *\p first, *(\p result + \c 1) = *(\p first + \c 1), - * and so on. Generally, for every integer \c n from \c 0 to \p last - \p first, \p copy - * performs the assignment *(\p result + \c n) = *(\p first + \c n). Unlike - * \c std::copy, \p copy offers no guarantee on order of operation. As a result, - * calling \p copy with overlapping source and destination ranges has undefined - * behavior. - * - * The return value is \p result + (\p last - \p first). - * - * \param first The beginning of the sequence to copy. - * \param last The end of the sequence to copy. - * \param result The destination sequence. - * \return The end of the destination sequence. - * \see http://www.sgi.com/tech/stl/copy.html - * - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator must be a model of Output Iterator. - * - * \pre \p result may be equal to \p first, but \p result shall not be in the range [first, last) otherwise. - * - * The following code snippet demonstrates how to use \p copy - * to copy from one range to another. - * - * \code - * #include - * #include - * ... - * - * thrust::device_vector vec0(100); - * thrust::device_vector vec1(100); - * ... - * - * thrust::copy(vec0.begin(), vec0.end(), - * vec1.begin()); - * - * // vec1 is now a copy of vec0 - * \endcode - */ -template - OutputIterator copy(InputIterator first, - InputIterator last, - OutputIterator result); - -/*! \p copy_n copies elements from the range [first, first + n) to the range - * [result, result + n). That is, it performs the assignments *result = *first, *(result + 1) = *(first + 1), - * and so on. Generally, for every integer \c i from \c 0 to \c n, \p copy - * performs the assignment *(\p result + \c i) = *(\p first + \c i). Unlike - * \c std::copy_n, \p copy_n offers no guarantee on order of operation. As a result, - * calling \p copy_n with overlapping source and destination ranges has undefined - * behavior. - * - * The return value is \p result + \p n. - * - * \param first The beginning of the range to copy. - * \param n The number of elements to copy. - * \param result The beginning destination range. - * \return The end of the destination range. - * - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam Size is an integral type. - * \tparam OutputIterator must be a model of Output Iterator. - * - * \pre \p result may be equal to \p first, but \p result shall not be in the range [first, first + n) otherwise. - * - * The following code snippet demonstrates how to use \p copy - * to copy from one range to another. - * - * \code - * #include - * #include - * ... - * size_t n = 100; - * thrust::device_vector vec0(n); - * thrust::device_vector vec1(n); - * ... - * thrust::copy_n(vec0.begin(), n, vec1.begin()); - * - * // vec1 is now a copy of vec0 - * \endcode - * - * \see http://www.sgi.com/tech/stl/copy_n.html - * \see thrust::copy - */ -template - OutputIterator copy_n(InputIterator first, - Size n, - OutputIterator result); - -/*! \} // end copying - */ - -/*! \addtogroup stream_compaction - * \{ - */ - - -/*! This version of \p copy_if copies elements from the range [first,last) - * to a range beginning at \ presult, except that any element which causes \p pred - * to be \p pred to be \c false is not copied. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, - * \p copy_if performs the assignment *result = *(first+n) and \p result - * is advanced one position if pred(*(first+n)). Otherwise, no assignment - * occurs and \p result is not advanced. - * - * The algorithm's execution is parallelized as determined by \p system. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence from which to copy. - * \param last The end of the sequence from which to copy. - * \param result The beginning of the sequence into which to copy. - * \param pred The predicate to test on every value of the range [first, last). - * \return result + n, where \c n is equal to the number of times \p pred - * evaluated to \c true in the range [first, last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The ranges [first, last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p copy_if to perform stream compaction - * to copy even numbers to an output range using the \p thrust::host parallelization policy: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * const int N = 6; - * int V[N] = {-2, 0, -1, 0, 1, 2}; - * int result[4]; - * - * thrust::copy_if(thrust::host, V, V + N, result, is_even()); - * - * // V remains {-2, 0, -1, 0, 1, 2} - * // result is now {-2, 0, 0, 2} - * \endcode - * - * \see \c remove_copy_if - */ -template - OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - - - -/*! This version of \p copy_if copies elements from the range [first,last) - * to a range beginning at \ presult, except that any element which causes \p pred - * to be \p pred to be \c false is not copied. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, - * \p copy_if performs the assignment *result = *(first+n) and \p result - * is advanced one position if pred(*(first+n)). Otherwise, no assignment - * occurs and \p result is not advanced. - * - * \param first The beginning of the sequence from which to copy. - * \param last The end of the sequence from which to copy. - * \param result The beginning of the sequence into which to copy. - * \param pred The predicate to test on every value of the range [first, last). - * \return result + n, where \c n is equal to the number of times \p pred - * evaluated to \c true in the range [first, last). - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The ranges [first, last) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p copy_if to perform stream compaction - * to copy even numbers to an output range. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * const int N = 6; - * int V[N] = {-2, 0, -1, 0, 1, 2}; - * int result[4]; - * - * thrust::copy_if(V, V + N, result, is_even()); - * - * // V remains {-2, 0, -1, 0, 1, 2} - * // result is now {-2, 0, 0, 2} - * \endcode - * - * \see \c remove_copy_if - */ -template - OutputIterator copy_if(InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - - -/*! This version of \p copy_if copies elements from the range [first,last) - * to a range beginning at \p result, except that any element whose corresponding stencil - * element causes \p pred to be \c false is not copied. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, - * \p copy_if performs the assignment *result = *(first+n) and \p result - * is advanced one position if pred(*(stencil+n)). Otherwise, no assignment - * occurs and \p result is not advanced. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence from which to copy. - * \param last The end of the sequence from which to copy. - * \param stencil The beginning of the stencil sequence. - * \param result The beginning of the sequence into which to copy. - * \param pred The predicate to test on every value of the range [stencil, stencil + (last-first)). - * \return result + n, where \c n is equal to the number of times \p pred - * evaluated to \c true in the range [stencil, stencil + (last-first)). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The ranges [first, last) and [result, result + (last - first)) shall not overlap. - * \pre The ranges [stencil, stencil + (last - first)) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p copy_if to perform stream compaction - * to copy numbers to an output range when corresponding stencil elements are even using the \p thrust::host execution policy: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int N = 6; - * int data[N] = { 0, 1, 2, 3, 4, 5}; - * int stencil[N] = {-2, 0, -1, 0, 1, 2}; - * int result[4]; - * - * thrust::copy_if(thrust::host, data, data + N, stencil, result, is_even()); - * - * // data remains = { 0, 1, 2, 3, 4, 5}; - * // stencil remains = {-2, 0, -1, 0, 1, 2}; - * // result is now { 0, 1, 3, 5} - * \endcode - * - * \see \c remove_copy_if - */ -template - OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - - -/*! This version of \p copy_if copies elements from the range [first,last) - * to a range beginning at \p result, except that any element whose corresponding stencil - * element causes \p pred to be \c false is not copied. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, - * \p copy_if performs the assignment *result = *(first+n) and \p result - * is advanced one position if pred(*(stencil+n)). Otherwise, no assignment - * occurs and \p result is not advanced. - * - * \param first The beginning of the sequence from which to copy. - * \param last The end of the sequence from which to copy. - * \param stencil The beginning of the stencil sequence. - * \param result The beginning of the sequence into which to copy. - * \param pred The predicate to test on every value of the range [stencil, stencil + (last-first)). - * \return result + n, where \c n is equal to the number of times \p pred - * evaluated to \c true in the range [stencil, stencil + (last-first)). - * - * \tparam InputIterator1 is a model of Input Iterator. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The ranges [first, last) and [result, result + (last - first)) shall not overlap. - * \pre The ranges [stencil, stencil + (last - first)) and [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p copy_if to perform stream compaction - * to copy numbers to an output range when corresponding stencil elements are even: - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int N = 6; - * int data[N] = { 0, 1, 2, 3, 4, 5}; - * int stencil[N] = {-2, 0, -1, 0, 1, 2}; - * int result[4]; - * - * thrust::copy_if(data, data + N, stencil, result, is_even()); - * - * // data remains = { 0, 1, 2, 3, 4, 5}; - * // stencil remains = {-2, 0, -1, 0, 1, 2}; - * // result is now { 0, 1, 3, 5} - * \endcode - * - * \see \c remove_copy_if - */ -template - OutputIterator copy_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - -/*! \} // end stream_compaction - */ - -} // end namespace thrust - -#include -#include - diff --git a/compat/thrust/count.h b/compat/thrust/count.h deleted file mode 100644 index cddd1dd68b..0000000000 --- a/compat/thrust/count.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file count.h - * \brief Counting elements in a range - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup algorithms - */ - -/*! \addtogroup reductions - * \ingroup algorithms - * \{ - */ - -/*! \addtogroup counting - * \ingroup reductions - * \{ - */ - - -/*! \p count finds the number of elements in [first,last) that are equal - * to \p value. More precisely, \p count returns the number of iterators \c i in - * [first, last) such that *i == value. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param value The value to be counted. - * \return The number of elements equal to \p value. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be a model of must be a model of Equality Comparable. - * \tparam EqualityComparable must be a model of Equality Comparable and can be compared for equality with \c InputIterator's \c value_type - * - * The following code snippet demonstrates how to use \p count to - * count the number of instances in a range of a value of interest using the \p thrust::device execution policy: - * - * \code - * #include - * #include - * #include - * ... - * // put 3 1s in a device_vector - * thrust::device_vector vec(5,0); - * vec[1] = 1; - * vec[3] = 1; - * vec[4] = 1; - * - * // count the 1s - * int result = thrust::count(thrust::device, vec.begin(), vec.end(), 1); - * // result == 3 - * \endcode - * - * \see http://www.sgi.com/tech/stl/count.html - */ -template - typename thrust::iterator_traits::difference_type - count(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, const EqualityComparable& value); - - - -/*! \p count finds the number of elements in [first,last) that are equal - * to \p value. More precisely, \p count returns the number of iterators \c i in - * [first, last) such that *i == value. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param value The value to be counted. - * \return The number of elements equal to \p value. - * - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be a model of must be a model of Equality Comparable. - * \tparam EqualityComparable must be a model of Equality Comparable and can be compared for equality with \c InputIterator's \c value_type - * - * The following code snippet demonstrates how to use \p count to - * count the number of instances in a range of a value of interest. - * \code - * #include - * #include - * ... - * // put 3 1s in a device_vector - * thrust::device_vector vec(5,0); - * vec[1] = 1; - * vec[3] = 1; - * vec[4] = 1; - * - * // count the 1s - * int result = thrust::count(vec.begin(), vec.end(), 1); - * // result == 3 - * \endcode - * - * \see http://www.sgi.com/tech/stl/count.html - */ -template - typename thrust::iterator_traits::difference_type - count(InputIterator first, InputIterator last, const EqualityComparable& value); - - -/*! \p count_if finds the number of elements in [first,last) for which - * a predicate is \c true. More precisely, \p count_if returns the number of iterators - * \c i in [first, last) such that pred(*i) == true. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param pred The predicate. - * \return The number of elements where \p pred is \c true. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c Predicate's \c argument_type. - * \tparam Predicate must be a model of Predicate. - * - * The following code snippet demonstrates how to use \p count to - * count the number of odd numbers in a range using the \p thrust::device execution policy: - * - * \code - * #include - * #include - * #include - * ... - * struct is_odd - * { - * __host__ __device__ - * bool operator()(int &x) - * { - * return x & 1; - * } - * }; - * ... - * // fill a device_vector with even & odd numbers - * thrust::device_vector vec(5); - * vec[0] = 0; - * vec[1] = 1; - * vec[2] = 2; - * vec[3] = 3; - * vec[4] = 4; - * - * // count the odd elements in vec - * int result = thrust::count_if(thrust::device, vec.begin(), vec.end(), is_odd()); - * // result == 2 - * \endcode - * - * \see http://www.sgi.com/tech/stl/count.html - */ -template - typename thrust::iterator_traits::difference_type - count_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); - - -/*! \p count_if finds the number of elements in [first,last) for which - * a predicate is \c true. More precisely, \p count_if returns the number of iterators - * \c i in [first, last) such that pred(*i) == true. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param pred The predicate. - * \return The number of elements where \p pred is \c true. - * - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c Predicate's \c argument_type. - * \tparam Predicate must be a model of Predicate. - * - * The following code snippet demonstrates how to use \p count to - * count the number of odd numbers in a range. - * \code - * #include - * #include - * ... - * struct is_odd - * { - * __host__ __device__ - * bool operator()(int &x) - * { - * return x & 1; - * } - * }; - * ... - * // fill a device_vector with even & odd numbers - * thrust::device_vector vec(5); - * vec[0] = 0; - * vec[1] = 1; - * vec[2] = 2; - * vec[3] = 3; - * vec[4] = 4; - * - * // count the odd elements in vec - * int result = thrust::count_if(vec.begin(), vec.end(), is_odd()); - * // result == 2 - * \endcode - * - * \see http://www.sgi.com/tech/stl/count.html - */ -template - typename thrust::iterator_traits::difference_type - count_if(InputIterator first, InputIterator last, Predicate pred); - -/*! \} // end counting - * \} // end reductions - */ - -} // end thrust - -#include - diff --git a/compat/thrust/detail/adjacent_difference.inl b/compat/thrust/detail/adjacent_difference.inl deleted file mode 100644 index 6590f9d15d..0000000000 --- a/compat/thrust/detail/adjacent_difference.inl +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file adjacent_difference.inl - * \brief Inline file for adjacent_difference.h - */ - -#include -#include -#include -#include - -namespace thrust -{ - - -template -OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::adjacent_difference; - - return adjacent_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); -} // end adjacent_difference() - - -template -OutputIterator adjacent_difference(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::adjacent_difference; - - return adjacent_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, binary_op); -} // end adjacent_difference() - - -template -OutputIterator adjacent_difference(InputIterator first, InputIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::adjacent_difference(select_system(system1, system2), first, last, result); -} // end adjacent_difference() - - -template -OutputIterator adjacent_difference(InputIterator first, InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::adjacent_difference(select_system(system1, system2), first, last, result, binary_op); -} // end adjacent_difference() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/advance.inl b/compat/thrust/detail/advance.inl deleted file mode 100644 index 2907be7534..0000000000 --- a/compat/thrust/detail/advance.inl +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file advance.inl - * \brief Inline file for advance.h - */ - -#include -#include -#include - -namespace thrust -{ - - -template -void advance(InputIterator& i, Distance n) -{ - thrust::system::detail::generic::advance(i, n); -} // end advance() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/allocator/allocator_traits.h b/compat/thrust/detail/allocator/allocator_traits.h deleted file mode 100644 index 6ee99b453f..0000000000 --- a/compat/thrust/detail/allocator/allocator_traits.h +++ /dev/null @@ -1,240 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace allocator_traits_detail -{ - -__THRUST_DEFINE_HAS_NESTED_TYPE(has_pointer, pointer) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_pointer, const_pointer) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_reference, reference) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_reference, const_reference) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_void_pointer, void_pointer) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_const_void_pointer, const_void_pointer) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_difference_type, difference_type) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_size_type, size_type) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_copy_assignment, propagate_on_container_copy_assignment) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_move_assignment, propagate_on_container_move_assignment) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_propagate_on_container_swap, propagate_on_container_swap) -__THRUST_DEFINE_HAS_NESTED_TYPE(has_system_type, system_type) - -template - struct nested_pointer -{ - typedef typename T::pointer type; -}; - -template - struct nested_const_pointer -{ - typedef typename T::const_pointer type; -}; - -template - struct nested_reference -{ - typedef typename T::reference type; -}; - -template - struct nested_const_reference -{ - typedef typename T::const_reference type; -}; - -template - struct nested_void_pointer -{ - typedef typename T::void_pointer type; -}; - -template - struct nested_const_void_pointer -{ - typedef typename T::const_void_pointer type; -}; - -template - struct nested_difference_type -{ - typedef typename T::difference_type type; -}; - -template - struct nested_size_type -{ - typedef typename T::size_type type; -}; - -template - struct nested_propagate_on_container_copy_assignment -{ - typedef typename T::propagate_on_container_copy_assignment type; -}; - -template - struct nested_propagate_on_container_move_assignment -{ - typedef typename T::propagate_on_container_move_assignment type; -}; - -template - struct nested_propagate_on_container_swap -{ - typedef typename T::propagate_on_container_swap type; -}; - -template - struct nested_system_type -{ - typedef typename T::system_type type; -}; - -} // end allocator_traits_detail - - -template - struct allocator_traits -{ - typedef Alloc allocator_type; - - typedef typename allocator_type::value_type value_type; - - typedef typename eval_if< - allocator_traits_detail::has_pointer::value, - allocator_traits_detail::nested_pointer, - identity_ - >::type pointer; - - private: - template - struct rebind_pointer - { - typedef typename pointer_traits::template rebind::other type; - }; - - public: - - typedef typename eval_if< - allocator_traits_detail::has_const_pointer::value, - allocator_traits_detail::nested_const_pointer, - rebind_pointer - >::type const_pointer; - - typedef typename eval_if< - allocator_traits_detail::has_void_pointer::value, - allocator_traits_detail::nested_void_pointer, - rebind_pointer - >::type void_pointer; - - typedef typename eval_if< - allocator_traits_detail::has_const_void_pointer::value, - allocator_traits_detail::nested_const_void_pointer, - rebind_pointer - >::type const_void_pointer; - - typedef typename eval_if< - allocator_traits_detail::has_difference_type::value, - allocator_traits_detail::nested_difference_type, - pointer_difference - >::type difference_type; - - typedef typename eval_if< - allocator_traits_detail::has_size_type::value, - allocator_traits_detail::nested_size_type, - make_unsigned - >::type size_type; - - typedef typename eval_if< - allocator_traits_detail::has_propagate_on_container_copy_assignment::value, - allocator_traits_detail::nested_propagate_on_container_copy_assignment, - identity_ - >::type propagate_on_container_copy_assignment; - - typedef typename eval_if< - allocator_traits_detail::has_propagate_on_container_move_assignment::value, - allocator_traits_detail::nested_propagate_on_container_move_assignment, - identity_ - >::type propagate_on_container_move_assignment; - - typedef typename eval_if< - allocator_traits_detail::has_propagate_on_container_swap::value, - allocator_traits_detail::nested_propagate_on_container_swap, - identity_ - >::type propagate_on_container_swap; - - typedef typename eval_if< - allocator_traits_detail::has_system_type::value, - allocator_traits_detail::nested_system_type, - thrust::iterator_system - >::type system_type; - - // XXX rebind and rebind_traits are alias templates - // and so are omitted while c++11 is unavailable - - inline static pointer allocate(allocator_type &a, size_type n); - - inline static pointer allocate(allocator_type &a, size_type n, const_void_pointer hint); - - inline static void deallocate(allocator_type &a, pointer p, size_type n); - - // XXX should probably change T* to pointer below and then relax later - - template - inline __host__ __device__ static void construct(allocator_type &a, T *p); - - template - inline __host__ __device__ static void construct(allocator_type &a, T *p, const Arg1 &arg1); - - template - inline __host__ __device__ static void destroy(allocator_type &a, T *p); - - inline static size_type max_size(const allocator_type &a); -}; // end allocator_traits - - -// XXX consider moving this non-standard functionality inside allocator_traits -template - struct allocator_system -{ - // the type of the allocator's system - typedef typename eval_if< - allocator_traits_detail::has_system_type::value, - allocator_traits_detail::nested_system_type, - thrust::iterator_system< - typename allocator_traits::pointer - > - >::type type; - - inline static type &get(Alloc &a); -}; - - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/allocator/allocator_traits.inl b/compat/thrust/detail/allocator/allocator_traits.inl deleted file mode 100644 index 83193355d5..0000000000 --- a/compat/thrust/detail/allocator/allocator_traits.inl +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace allocator_traits_detail -{ - -__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_allocate_with_hint_impl, allocate) - -template - class has_member_allocate_with_hint -{ - typedef typename allocator_traits::pointer pointer; - typedef typename allocator_traits::size_type size_type; - typedef typename allocator_traits::const_void_pointer const_void_pointer; - - public: - typedef typename has_member_allocate_with_hint_impl::type type; - static const bool value = type::value; -}; - -template - typename enable_if< - has_member_allocate_with_hint::value, - typename allocator_traits::pointer - >::type - allocate(Alloc &a, typename allocator_traits::size_type n, typename allocator_traits::const_void_pointer hint) -{ - return a.allocate(n,hint); -} - -template - typename disable_if< - has_member_allocate_with_hint::value, - typename allocator_traits::pointer - >::type - allocate(Alloc &a, typename allocator_traits::size_type n, typename allocator_traits::const_void_pointer) -{ - return a.allocate(n); -} - - -__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_construct1_impl, construct) - -template - struct has_member_construct1 - : has_member_construct1_impl -{}; - -template - inline __host__ __device__ - typename enable_if< - has_member_construct1::value - >::type - construct(Alloc &a, T *p) -{ - a.construct(p); -} - -template - inline __host__ __device__ - typename disable_if< - has_member_construct1::value - >::type - construct(Alloc &a, T *p) -{ - ::new(static_cast(p)) T(); -} - - -__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_construct2_impl, construct) - -template - struct has_member_construct2 - : has_member_construct2_impl -{}; - -template - inline __host__ __device__ - typename enable_if< - has_member_construct2::value - >::type - construct(Alloc &a, T *p, const Arg1 &arg1) -{ - a.construct(p,arg1); -} - -template - inline __host__ __device__ - typename disable_if< - has_member_construct2::value - >::type - construct(Alloc &, T *p, const Arg1 &arg1) -{ - ::new(static_cast(p)) T(arg1); -} - - -__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_destroy_impl, destroy) - -template - struct has_member_destroy - : has_member_destroy_impl -{}; - -template - inline __host__ __device__ - typename enable_if< - has_member_destroy::value - >::type - destroy(Alloc &a, T *p) -{ - a.destroy(p); -} - -template - inline __host__ __device__ - typename disable_if< - has_member_destroy::value - >::type - destroy(Alloc &, T *p) -{ - p->~T(); -} - - -__THRUST_DEFINE_IS_CALL_POSSIBLE(has_member_max_size_impl, max_size) - -template - class has_member_max_size -{ - typedef typename allocator_traits::size_type size_type; - - public: - typedef typename has_member_max_size_impl::type type; - static const bool value = type::value; -}; - -template - typename enable_if< - has_member_max_size::value, - typename allocator_traits::size_type - >::type - max_size(const Alloc &a) -{ - return a.max_size(); -} - -template - typename disable_if< - has_member_max_size::value, - typename allocator_traits::size_type - >::type - max_size(const Alloc &a) -{ - typedef typename allocator_traits::size_type size_type; - return std::numeric_limits::max(); -} - -__THRUST_DEFINE_HAS_MEMBER_FUNCTION(has_member_system_impl, system) - -template - class has_member_system -{ - typedef typename allocator_system::type system_type; - - public: - typedef typename has_member_system_impl::type type; - static const bool value = type::value; -}; - -template - typename enable_if< - has_member_system::value, - typename allocator_system::type & - >::type - system(Alloc &a) -{ - return a.system(); -} - -template - typename disable_if< - has_member_system::value, - typename allocator_system::type & - >::type - system(Alloc &a) -{ - // assumes the system is default-constructible - static typename allocator_system::type state; - return state; -} - - -} // end allocator_traits_detail - - -template - typename allocator_traits::pointer - allocator_traits - ::allocate(Alloc &a, typename allocator_traits::size_type n) -{ - return a.allocate(n); -} - -template - typename allocator_traits::pointer - allocator_traits - ::allocate(Alloc &a, typename allocator_traits::size_type n, typename allocator_traits::const_void_pointer hint) -{ - return allocator_traits_detail::allocate(a, n, hint); -} - -template - void allocator_traits - ::deallocate(Alloc &a, typename allocator_traits::pointer p, typename allocator_traits::size_type n) -{ - return a.deallocate(p,n); -} - -template - template - void allocator_traits - ::construct(allocator_type &a, T *p) -{ - return allocator_traits_detail::construct(a,p); -} - -template - template - void allocator_traits - ::construct(allocator_type &a, T *p, const Arg1 &arg1) -{ - return allocator_traits_detail::construct(a,p,arg1); -} - -template - template - void allocator_traits - ::destroy(allocator_type &a, T *p) -{ - return allocator_traits_detail::destroy(a,p); -} - -template - typename allocator_traits::size_type - allocator_traits - ::max_size(const allocator_type &a) -{ - return allocator_traits_detail::max_size(a); -} - -template - typename allocator_system::type & - allocator_system - ::get(Alloc &a) -{ - return allocator_traits_detail::system(a); -} - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/allocator/copy_construct_range.h b/compat/thrust/detail/allocator/copy_construct_range.h deleted file mode 100644 index 5d99e1fa14..0000000000 --- a/compat/thrust/detail/allocator/copy_construct_range.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace detail -{ - -template - Pointer copy_construct_range(thrust::execution_policy &from_system, - Allocator &a, - InputIterator first, - InputIterator last, - Pointer result); - -template - Pointer copy_construct_range_n(thrust::execution_policy &from_system, - Allocator &a, - InputIterator first, - Size n, - Pointer result); - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/allocator/copy_construct_range.inl b/compat/thrust/detail/allocator/copy_construct_range.inl deleted file mode 100644 index 7c5478b65b..0000000000 --- a/compat/thrust/detail/allocator/copy_construct_range.inl +++ /dev/null @@ -1,298 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace allocator_traits_detail -{ - - -template - struct copy_construct_with_allocator -{ - Allocator &a; - - copy_construct_with_allocator(Allocator &a) - : a(a) - {} - - template - inline __host__ __device__ - void operator()(Tuple t) - { - const InputType &in = thrust::get<0>(t); - OutputType &out = thrust::get<1>(t); - - allocator_traits::construct(a, &out, in); - } -}; - - -template - struct needs_copy_construct_via_allocator - : has_member_construct2< - Allocator, - T, - T - > -{}; - - -// we know that std::allocator::construct's only effect is to call T's -// copy constructor, so we needn't use it for copy construction -template - struct needs_copy_construct_via_allocator, T> - : thrust::detail::false_type -{}; - - -// XXX it's regrettable that this implementation is copied almost -// exactly from system::detail::generic::uninitialized_copy -// perhaps generic::uninitialized_copy could call this routine -// with a default allocator -template - typename enable_if_convertible< - FromSystem, - ToSystem, - Pointer - >::type - uninitialized_copy_with_allocator(Allocator &a, - thrust::execution_policy &from_system, - thrust::execution_policy &to_system, - InputIterator first, - InputIterator last, - Pointer result) -{ - // zip up the iterators - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result)); - ZipIterator end = begin; - - // get a zip_iterator pointing to the end - const typename thrust::iterator_difference::type n = thrust::distance(first,last); - thrust::advance(end,n); - - // create a functor - typedef typename iterator_traits::value_type InputType; - typedef typename iterator_traits::value_type OutputType; - - // do the for_each - // note we use to_system to dispatch the for_each - thrust::for_each(to_system, begin, end, copy_construct_with_allocator(a)); - - // return the end of the output range - return thrust::get<1>(end.get_iterator_tuple()); -} - - -// XXX it's regrettable that this implementation is copied almost -// exactly from system::detail::generic::uninitialized_copy_n -// perhaps generic::uninitialized_copy_n could call this routine -// with a default allocator -template - typename enable_if_convertible< - FromSystem, - ToSystem, - Pointer - >::type - uninitialized_copy_with_allocator_n(Allocator &a, - thrust::execution_policy &from_system, - thrust::execution_policy &to_system, - InputIterator first, - Size n, - Pointer result) -{ - // zip up the iterators - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result)); - - // create a functor - typedef typename iterator_traits::value_type InputType; - typedef typename iterator_traits::value_type OutputType; - - // do the for_each_n - // note we use to_system to dispatch the for_each_n - ZipIterator end = thrust::for_each_n(to_system, begin, n, copy_construct_with_allocator(a)); - - // return the end of the output range - return thrust::get<1>(end.get_iterator_tuple()); -} - - -template - typename disable_if_convertible< - FromSystem, - ToSystem, - Pointer - >::type - uninitialized_copy_with_allocator(Allocator &, - thrust::execution_policy &from_system, - thrust::execution_policy &to_system, - InputIterator first, - InputIterator last, - Pointer result) -{ - // the systems aren't trivially interoperable - // just call two_system_copy and hope for the best - return thrust::detail::two_system_copy(from_system, to_system, first, last, result); -} // end uninitialized_copy_with_allocator() - - -template - typename disable_if_convertible< - FromSystem, - ToSystem, - Pointer - >::type - uninitialized_copy_with_allocator_n(Allocator &, - thrust::execution_policy &from_system, - thrust::execution_policy &to_system, - InputIterator first, - Size n, - Pointer result) -{ - // the systems aren't trivially interoperable - // just call two_system_copy_n and hope for the best - return thrust::detail::two_system_copy_n(from_system, to_system, first, n, result); -} // end uninitialized_copy_with_allocator_n() - - -template - typename disable_if< - needs_copy_construct_via_allocator< - Allocator, - typename pointer_element::type - >::value, - Pointer - >::type - copy_construct_range(thrust::execution_policy &from_system, - Allocator &a, - InputIterator first, - InputIterator last, - Pointer result) -{ - typename allocator_system::type &to_system = allocator_system::get(a); - - // just call two_system_copy - return thrust::detail::two_system_copy(from_system, to_system, first, last, result); -} - - -template - typename disable_if< - needs_copy_construct_via_allocator< - Allocator, - typename pointer_element::type - >::value, - Pointer - >::type - copy_construct_range_n(thrust::execution_policy &from_system, - Allocator &a, - InputIterator first, - Size n, - Pointer result) -{ - typename allocator_system::type &to_system = allocator_system::get(a); - - // just call two_system_copy_n - return thrust::detail::two_system_copy_n(from_system, to_system, first, n, result); -} - - -template - typename enable_if< - needs_copy_construct_via_allocator< - Allocator, - typename pointer_element::type - >::value, - Pointer - >::type - copy_construct_range(thrust::execution_policy &from_system, - Allocator &a, - InputIterator first, - InputIterator last, - Pointer result) -{ - typename allocator_system::type &to_system = allocator_system::get(a); - return uninitialized_copy_with_allocator(a, from_system, to_system, first, last, result); -} - - -template - typename enable_if< - needs_copy_construct_via_allocator< - Allocator, - typename pointer_element::type - >::value, - Pointer - >::type - copy_construct_range_n(thrust::execution_policy &from_system, - Allocator &a, - InputIterator first, - Size n, - Pointer result) -{ - typename allocator_system::type &to_system = allocator_system::get(a); - return uninitialized_copy_with_allocator_n(a, from_system, to_system, first, n, result); -} - - -} // end allocator_traits_detail - - -template - Pointer copy_construct_range(thrust::execution_policy &from_system, - Allocator &a, - InputIterator first, - InputIterator last, - Pointer result) -{ - return allocator_traits_detail::copy_construct_range(from_system, a, first, last, result); -} - - -template - Pointer copy_construct_range_n(thrust::execution_policy &from_system, - Allocator &a, - InputIterator first, - Size n, - Pointer result) -{ - return allocator_traits_detail::copy_construct_range_n(from_system, a, first, n, result); -} - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/allocator/default_construct_range.h b/compat/thrust/detail/allocator/default_construct_range.h deleted file mode 100644 index d83cb31f35..0000000000 --- a/compat/thrust/detail/allocator/default_construct_range.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace detail -{ - - -template -inline void default_construct_range(Allocator &a, Pointer p, Size n); - - -} // end detail -} // end thrust - -#include - - diff --git a/compat/thrust/detail/allocator/default_construct_range.inl b/compat/thrust/detail/allocator/default_construct_range.inl deleted file mode 100644 index 45fe9c69a1..0000000000 --- a/compat/thrust/detail/allocator/default_construct_range.inl +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace allocator_traits_detail -{ - - -template - struct construct1_via_allocator -{ - Allocator &a; - - construct1_via_allocator(Allocator &a) - : a(a) - {} - - template - inline __host__ __device__ - void operator()(T &x) - { - allocator_traits::construct(a, &x); - } -}; - - -template - struct needs_default_construct_via_allocator - : has_member_construct1< - Allocator, - T - > -{}; - - -// we know that std::allocator::construct's only effect is to call T's -// default constructor, so we needn't use it for default construction -template - struct needs_default_construct_via_allocator, T> - : thrust::detail::false_type -{}; - - -template - typename enable_if< - needs_default_construct_via_allocator< - Allocator, - typename pointer_element::type - >::value - >::type - default_construct_range(Allocator &a, Pointer p, Size n) -{ - thrust::for_each_n(allocator_system::get(a), p, n, construct1_via_allocator(a)); -} - - -template - typename disable_if< - needs_default_construct_via_allocator< - Allocator, - typename pointer_element::type - >::value - >::type - default_construct_range(Allocator &a, Pointer p, Size n) -{ - thrust::uninitialized_fill_n(allocator_system::get(a), p, n, typename pointer_element::type()); -} - - -} // end allocator_traits_detail - - -template - void default_construct_range(Allocator &a, Pointer p, Size n) -{ - return allocator_traits_detail::default_construct_range(a,p,n); -} - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/allocator/destroy_range.h b/compat/thrust/detail/allocator/destroy_range.h deleted file mode 100644 index d690a60a79..0000000000 --- a/compat/thrust/detail/allocator/destroy_range.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace detail -{ - -template - inline void destroy_range(Allocator &a, Pointer p, Size n); - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/allocator/destroy_range.inl b/compat/thrust/detail/allocator/destroy_range.inl deleted file mode 100644 index ace222356f..0000000000 --- a/compat/thrust/detail/allocator/destroy_range.inl +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace allocator_traits_detail -{ - - -// destroy_range has three cases: -// if Allocator has an effectful member function destroy: -// 1. destroy via the allocator -// else -// 2. if T has a non-trivial destructor, destroy the range without using the allocator -// 3. if T has a trivial destructor, do a no-op - -template - struct has_effectful_member_destroy - : has_member_destroy -{}; - -// std::allocator::destroy's only effect is to invoke its argument's destructor -template - struct has_effectful_member_destroy, T> - : thrust::detail::false_type -{}; - -// case 1: Allocator has an effectful 1-argument member function "destroy" -template - struct enable_if_destroy_range_case1 - : thrust::detail::enable_if< - has_effectful_member_destroy< - Allocator, - typename pointer_element::type - >::value - > -{}; - -// case 2: Allocator has no member function "destroy", but T has a non-trivial destructor -template - struct enable_if_destroy_range_case2 - : thrust::detail::enable_if< - !has_effectful_member_destroy< - Allocator, - typename pointer_element::type - >::value && - !has_trivial_destructor< - typename pointer_element::type - >::value - > -{}; - -// case 3: Allocator has no member function "destroy", and T has a trivial destructor -template - struct enable_if_destroy_range_case3 - : thrust::detail::enable_if< - !has_effectful_member_destroy< - Allocator, - typename pointer_element::type - >::value && - has_trivial_destructor< - typename pointer_element::type - >::value - > -{}; - - - -template - struct destroy_via_allocator -{ - Allocator &a; - - destroy_via_allocator(Allocator &a) - : a(a) - {} - - template - inline __host__ __device__ - void operator()(T &x) - { - allocator_traits::destroy(a, &x); - } -}; - - -// destroy_range case 1: destroy via allocator -template - typename enable_if_destroy_range_case1::type - destroy_range(Allocator &a, Pointer p, Size n) -{ - thrust::for_each_n(allocator_system::get(a), p, n, destroy_via_allocator(a)); -} - - -// we must prepare for His coming -struct gozer -{ - template - inline __host__ __device__ - void operator()(T &x) - { - x.~T(); - } -}; - -// destroy_range case 2: destroy without the allocator -template - typename enable_if_destroy_range_case2::type - destroy_range(Allocator &a, Pointer p, Size n) -{ - thrust::for_each_n(allocator_system::get(a), p, n, gozer()); -} - - -// destroy_range case 3: no-op -template - typename enable_if_destroy_range_case3::type - destroy_range(Allocator &, Pointer, Size) -{ - // no op -} - - -} // end allocator_traits_detail - - -template - void destroy_range(Allocator &a, Pointer p, Size n) -{ - return allocator_traits_detail::destroy_range(a,p,n); -} - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/allocator/fill_construct_range.h b/compat/thrust/detail/allocator/fill_construct_range.h deleted file mode 100644 index 66fec416c3..0000000000 --- a/compat/thrust/detail/allocator/fill_construct_range.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace detail -{ - - -template -inline void fill_construct_range(Allocator &a, Pointer p, Size n, const T &value); - - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/allocator/fill_construct_range.inl b/compat/thrust/detail/allocator/fill_construct_range.inl deleted file mode 100644 index e2c9c09c80..0000000000 --- a/compat/thrust/detail/allocator/fill_construct_range.inl +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace allocator_traits_detail -{ - -// fill_construct_range has 2 cases: -// if Allocator has an effectful member function construct: -// 1. construct via the allocator -// else -// 2. construct via uninitialized_fill - -template - struct has_effectful_member_construct2 - : has_member_construct2 -{}; - -// std::allocator::construct's only effect is to invoke placement new -template - struct has_effectful_member_construct2,T,Arg1> - : thrust::detail::false_type -{}; - - -template - struct construct2_via_allocator -{ - Allocator &a; - Arg1 arg; - - construct2_via_allocator(Allocator &a, const Arg1 &arg) - : a(a), arg(arg) - {} - - template - inline __host__ __device__ - void operator()(T &x) - { - allocator_traits::construct(a, &x, arg); - } -}; - - -template - typename enable_if< - has_effectful_member_construct2< - Allocator, - typename pointer_element::type, - T - >::value - >::type - fill_construct_range(Allocator &a, Pointer p, Size n, const T &value) -{ - thrust::for_each_n(allocator_system::get(a), p, n, construct2_via_allocator(a, value)); -} - - -template - typename disable_if< - has_effectful_member_construct2< - Allocator, - typename pointer_element::type, - T - >::value - >::type - fill_construct_range(Allocator &a, Pointer p, Size n, const T &value) -{ - thrust::uninitialized_fill_n(allocator_system::get(a), p, n, value); -} - - -} // end allocator_traits_detail - - -template - void fill_construct_range(Alloc &a, Pointer p, Size n, const T &value) -{ - return allocator_traits_detail::fill_construct_range(a,p,n,value); -} - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/allocator/malloc_allocator.h b/compat/thrust/detail/allocator/malloc_allocator.h deleted file mode 100644 index cf4567e419..0000000000 --- a/compat/thrust/detail/allocator/malloc_allocator.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -template - class malloc_allocator - : public thrust::detail::tagged_allocator< - T, System, Pointer - > -{ - private: - typedef thrust::detail::tagged_allocator< - T, System, Pointer - > super_t; - - public: - typedef typename super_t::pointer pointer; - typedef typename super_t::size_type size_type; - - pointer allocate(size_type cnt); - - void deallocate(pointer p, size_type n); -}; - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/allocator/malloc_allocator.inl b/compat/thrust/detail/allocator/malloc_allocator.inl deleted file mode 100644 index dd70202a04..0000000000 --- a/compat/thrust/detail/allocator/malloc_allocator.inl +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template - typename malloc_allocator::pointer - malloc_allocator - ::allocate(typename malloc_allocator::size_type cnt) -{ - using thrust::system::detail::generic::select_system; - - // XXX should use a hypothetical thrust::static_pointer_cast here - System system; - - pointer result = thrust::malloc(select_system(system), cnt); - - if(result.get() == 0) - { - throw thrust::system::detail::bad_alloc("malloc_allocator::allocate: malloc failed"); - } // end if - - return result; -} // end malloc_allocator::allocate() - - -template - void malloc_allocator - ::deallocate(typename malloc_allocator::pointer p, typename malloc_allocator::size_type n) -{ - using thrust::system::detail::generic::select_system; - - System system; - thrust::free(select_system(system), p); -} // end malloc_allocator - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/allocator/no_throw_allocator.h b/compat/thrust/detail/allocator/no_throw_allocator.h deleted file mode 100644 index ce397dbdb5..0000000000 --- a/compat/thrust/detail/allocator/no_throw_allocator.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace detail -{ - -template - struct no_throw_allocator : BaseAllocator -{ - private: - typedef BaseAllocator super_t; - - public: - inline no_throw_allocator(const BaseAllocator &other = BaseAllocator()) - : super_t(other) - {} - - template - struct rebind - { - typedef no_throw_allocator::other> other; - }; // end rebind - - void deallocate(typename super_t::pointer p, typename super_t::size_type n) - { - try - { - super_t::deallocate(p, n); - } // end try - catch(...) - { - // catch anything - } // end catch - } // end deallocate() - - inline bool operator==(no_throw_allocator const &other) { return super_t::operator==(other); } - inline bool operator!=(no_throw_allocator const &other) { return super_t::operator!=(other); } -}; // end no_throw_allocator - -} // end detail -} // end thrust - - diff --git a/compat/thrust/detail/allocator/tagged_allocator.h b/compat/thrust/detail/allocator/tagged_allocator.h deleted file mode 100644 index 3cb87a32bf..0000000000 --- a/compat/thrust/detail/allocator/tagged_allocator.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -template class tagged_allocator; - -template - class tagged_allocator -{ - public: - typedef void value_type; - typedef typename thrust::detail::pointer_traits::template rebind::other pointer; - typedef typename thrust::detail::pointer_traits::template rebind::other const_pointer; - typedef std::size_t size_type; - typedef typename thrust::detail::pointer_traits::difference_type difference_type; - typedef Tag system_type; - - template - struct rebind - { - typedef tagged_allocator other; - }; // end rebind -}; - -template - class tagged_allocator -{ - public: - typedef T value_type; - typedef typename thrust::detail::pointer_traits::template rebind::other pointer; - typedef typename thrust::detail::pointer_traits::template rebind::other const_pointer; - typedef typename thrust::iterator_reference::type reference; - typedef typename thrust::iterator_reference::type const_reference; - typedef std::size_t size_type; - typedef typename thrust::detail::pointer_traits::difference_type difference_type; - typedef Tag system_type; - - template - struct rebind - { - typedef tagged_allocator other; - }; // end rebind - - __host__ __device__ - inline tagged_allocator(); - - __host__ __device__ - inline tagged_allocator(const tagged_allocator &); - - template - __host__ __device__ - inline tagged_allocator(const tagged_allocator &); - - __host__ __device__ - inline ~tagged_allocator(); - - __host__ __device__ - pointer address(reference x) const; - - __host__ __device__ - const_pointer address(const_reference x) const; - - size_type max_size() const; -}; - -template -__host__ __device__ -bool operator==(const tagged_allocator &, const tagged_allocator &); - -template -__host__ __device__ -bool operator!=(const tagged_allocator &, const tagged_allocator &); - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/allocator/tagged_allocator.inl b/compat/thrust/detail/allocator/tagged_allocator.inl deleted file mode 100644 index cb362a840d..0000000000 --- a/compat/thrust/detail/allocator/tagged_allocator.inl +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template - tagged_allocator - ::tagged_allocator() -{} - - -template - tagged_allocator - ::tagged_allocator(const tagged_allocator &) -{} - - -template - template - tagged_allocator - ::tagged_allocator(const tagged_allocator &) -{} - - -template - tagged_allocator - ::~tagged_allocator() -{} - - -template - typename tagged_allocator::pointer - tagged_allocator - ::address(reference x) const -{ - return &x; -} - - -template - typename tagged_allocator::const_pointer - tagged_allocator - ::address(const_reference x) const -{ - return &x; -} - - -template - typename tagged_allocator::size_type - tagged_allocator - ::max_size() const -{ - return (std::numeric_limits::max)() / sizeof(T); -} - - -template -__host__ __device__ -bool operator==(const tagged_allocator &, const tagged_allocator &) -{ - return true; -} - - -template -__host__ __device__ -bool operator!=(const tagged_allocator &, const tagged_allocator &) -{ - return false; -} - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/allocator/temporary_allocator.h b/compat/thrust/detail/allocator/temporary_allocator.h deleted file mode 100644 index f0496f9fe1..0000000000 --- a/compat/thrust/detail/allocator/temporary_allocator.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -// XXX the pointer parameter given to tagged_allocator should be related to -// the type of the expression get_temporary_buffer(system, n).first -// without decltype, compromise on pointer -template - class temporary_allocator - : public thrust::detail::tagged_allocator< - T, System, thrust::pointer - > -{ - private: - typedef thrust::detail::tagged_allocator< - T, System, thrust::pointer - > super_t; - - System &m_system; - - public: - typedef typename super_t::pointer pointer; - typedef typename super_t::size_type size_type; - - inline explicit temporary_allocator(thrust::execution_policy &system) : - super_t(), - m_system(thrust::detail::derived_cast(system)) - {} - - pointer allocate(size_type cnt); - - void deallocate(pointer p, size_type n); - - inline System &system() - { - return m_system; - } // end system() - - private: - typedef thrust::pair pointer_and_size; -}; // end temporary_allocator - - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/allocator/temporary_allocator.inl b/compat/thrust/detail/allocator/temporary_allocator.inl deleted file mode 100644 index 63221d57e9..0000000000 --- a/compat/thrust/detail/allocator/temporary_allocator.inl +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template - typename temporary_allocator::pointer - temporary_allocator - ::allocate(typename temporary_allocator::size_type cnt) -{ - pointer_and_size result = thrust::get_temporary_buffer(system(), cnt); - - // handle failure - if(result.second < cnt) - { - // deallocate and throw - // note that we pass cnt to deallocate, not a value derived from result.second - deallocate(result.first, cnt); - - throw thrust::system::detail::bad_alloc("temporary_buffer::allocate: get_temporary_buffer failed"); - } // end if - - return result.first; -} // end temporary_allocator::allocate() - - -template - void temporary_allocator - ::deallocate(typename temporary_allocator::pointer p, typename temporary_allocator::size_type n) -{ - return thrust::return_temporary_buffer(system(), p); -} // end temporary_allocator - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/binary_search.inl b/compat/thrust/detail/binary_search.inl deleted file mode 100644 index 0fd799a311..0000000000 --- a/compat/thrust/detail/binary_search.inl +++ /dev/null @@ -1,458 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file binary_search.inl - * \brief Inline file for binary_search.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template -ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable &value) -{ - using thrust::system::detail::generic::lower_bound; - return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); -} - - -template -ForwardIterator lower_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &value, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::lower_bound; - return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp); -} - - -template -ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable &value) -{ - using thrust::system::detail::generic::upper_bound; - return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); -} - - -template -ForwardIterator upper_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &value, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::upper_bound; - return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp); -} - - -template -bool binary_search(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value) -{ - using thrust::system::detail::generic::binary_search; - return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); -} - - -template -bool binary_search(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::binary_search; - return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp); -} - - -template -thrust::pair -equal_range(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::equal_range; - return equal_range(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value, comp); -} - - -template -thrust::pair -equal_range(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value) -{ - using thrust::system::detail::generic::equal_range; - return equal_range(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); -} - - -template -OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output) -{ - using thrust::system::detail::generic::lower_bound; - return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output); -} - - -template -OutputIterator lower_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::lower_bound; - return lower_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp); -} - - -template -OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output) -{ - using thrust::system::detail::generic::upper_bound; - return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output); -} - - -template -OutputIterator upper_bound(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::upper_bound; - return upper_bound(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp); -} - - -template -OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output) -{ - using thrust::system::detail::generic::binary_search; - return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output); -} - - -template -OutputIterator binary_search(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::binary_search; - return binary_search(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, values_first, values_last, output, comp); -} - - -////////////////////// -// Scalar Functions // -////////////////////// - -template -ForwardIterator lower_bound(ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::lower_bound(select_system(system), first, last, value); -} - -template -ForwardIterator lower_bound(ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::lower_bound(select_system(system), first, last, value, comp); -} - -template -ForwardIterator upper_bound(ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::upper_bound(select_system(system), first, last, value); -} - -template -ForwardIterator upper_bound(ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::upper_bound(select_system(system), first, last, value, comp); -} - -template -bool binary_search(ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::binary_search(select_system(system), first, last, value); -} - -template -bool binary_search(ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::binary_search(select_system(system), first, last, value, comp); -} - -template -thrust::pair -equal_range(ForwardIterator first, - ForwardIterator last, - const LessThanComparable& value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::equal_range(select_system(system), first, last, value); -} - -template -thrust::pair -equal_range(ForwardIterator first, - ForwardIterator last, - const T& value, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::equal_range(select_system(system), first, last, value, comp); -} - -////////////////////// -// Vector Functions // -////////////////////// - -template -OutputIterator lower_bound(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::lower_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output); -} - -template -OutputIterator lower_bound(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::lower_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp); -} - -template -OutputIterator upper_bound(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::upper_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output); -} - -template -OutputIterator upper_bound(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::upper_bound(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp); -} - -template -OutputIterator binary_search(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::binary_search(select_system(system1,system2,system3), first, last, values_first, values_last, output); -} - -template -OutputIterator binary_search(ForwardIterator first, - ForwardIterator last, - InputIterator values_first, - InputIterator values_last, - OutputIterator output, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::binary_search(select_system(system1,system2,system3), first, last, values_first, values_last, output, comp); -} - -} // end namespace thrust - diff --git a/compat/thrust/detail/config.h b/compat/thrust/detail/config.h deleted file mode 100644 index d6b6691089..0000000000 --- a/compat/thrust/detail/config.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/*! \file config.h - * \brief Defines platform configuration. - */ - -#pragma once - -#include - diff --git a/compat/thrust/detail/config/compiler.h b/compat/thrust/detail/config/compiler.h deleted file mode 100644 index 90ce911155..0000000000 --- a/compat/thrust/detail/config/compiler.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file compiler.h - * \brief Compiler-specific configuration - */ - -#pragma once - -#ifdef __CUDACC__ - -#include - -// Thrust supports CUDA >= 3.0 -#if CUDA_VERSION < 3000 -#error "CUDA v3.0 or newer is required" -#endif // CUDA_VERSION - -#endif // __CUDACC__ - -// enumerate host compilers we know about -#define THRUST_HOST_COMPILER_UNKNOWN 0 -#define THRUST_HOST_COMPILER_MSVC 1 -#define THRUST_HOST_COMPILER_GCC 2 - -// enumerate host compilers we know about -#define THRUST_DEVICE_COMPILER_UNKNOWN 0 -#define THRUST_DEVICE_COMPILER_MSVC 1 -#define THRUST_DEVICE_COMPILER_GCC 2 -#define THRUST_DEVICE_COMPILER_NVCC 3 - -// figure out which host compiler we're using -// XXX we should move the definition of THRUST_DEPRECATED out of this logic -#if defined(_MSC_VER) -#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_MSVC -#define THRUST_DEPRECATED __declspec(deprecated) -#elif defined(__GNUC__) -#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_GCC -#define THRUST_DEPRECATED __attribute__ ((deprecated)) -#define THRUST_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) -#else -#define THRUST_HOST_COMPILER THRUST_HOST_COMPILER_UNKNOWN -#define THRUST_DEPRECATED -#endif // THRUST_HOST_COMPILER - -// figure out which device compiler we're using -#if defined(__CUDACC__) -#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_NVCC -#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC -#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_MSVC -#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC -#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_GCC -#else -#define THRUST_DEVICE_COMPILER THRUST_DEVICE_COMPILER_UNKNOWN -#endif - -// is the device compiler capable of compiling omp? -#ifdef _OPENMP -#define THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE THRUST_TRUE -#else -#define THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE THRUST_FALSE -#endif // _OPENMP - -// disable specific MSVC warnings -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && !defined(__CUDA_ARCH__) -#define __THRUST_DISABLE_MSVC_WARNING_BEGIN(x) \ -__pragma(warning(push)) \ -__pragma(warning(disable : x)) -#define __THRUST_DISABLE_MSVC_WARNING_END(x) \ -__pragma(warning(pop)) -#else -#define __THRUST_DISABLE_MSVC_WARNING_BEGIN(x) -#define __THRUST_DISABLE_MSVC_WARNING_END(x) -#endif -#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING(x) \ -__THRUST_DISABLE_MSVC_WARNING_BEGIN(4244 4267) \ -x;\ -__THRUST_DISABLE_MSVC_WARNING_END(4244 4267) -#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN \ -__THRUST_DISABLE_MSVC_WARNING_BEGIN(4244 4267) -#define __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END \ -__THRUST_DISABLE_MSVC_WARNING_END(4244 4267) -#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL(x) \ -__THRUST_DISABLE_MSVC_WARNING_BEGIN(4800) \ -x;\ -__THRUST_DISABLE_MSVC_WARNING_END(4800) -#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_BEGIN \ -__THRUST_DISABLE_MSVC_WARNING_BEGIN(4800) -#define __THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_END \ -__THRUST_DISABLE_MSVC_WARNING_END(4800) diff --git a/compat/thrust/detail/config/compiler_fence.h b/compat/thrust/detail/config/compiler_fence.h deleted file mode 100644 index f5cbf98204..0000000000 --- a/compat/thrust/detail/config/compiler_fence.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// msvc case -#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC - -#ifndef _DEBUG - -#include -#pragma intrinsic(_ReadWriteBarrier) -#define __thrust_compiler_fence() _ReadWriteBarrier() -#else - -#define __thrust_compiler_fence() do {} while (0) - -#endif // _DEBUG - -// gcc case -#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC - -#if THRUST_GCC_VERSION >= 40200 // atomic built-ins were introduced ~4.2 -#define __thrust_compiler_fence() __sync_synchronize() -#else -// allow the code to compile without any guarantees -#define __thrust_compiler_fence() do {} while (0) -#endif // THRUST_GCC_VERSION - -// unknown case -#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_UNKNOWN - -// allow the code to compile without any guarantees -#define __thrust_compiler_fence() do {} while (0) - -#endif - diff --git a/compat/thrust/detail/config/config.h b/compat/thrust/detail/config/config.h deleted file mode 100644 index f3498acd14..0000000000 --- a/compat/thrust/detail/config/config.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file config.h - * \brief Defines platform configuration. - */ - -#pragma once - -// XXX the order of these #includes matters - -#include -#include -// host_system.h & device_system.h must be #included as early as possible -// because other config headers depend on it -#include -#include -#include -#include -#include -#include -#include - diff --git a/compat/thrust/detail/config/debug.h b/compat/thrust/detail/config/debug.h deleted file mode 100644 index 56c1bad207..0000000000 --- a/compat/thrust/detail/config/debug.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#ifndef THRUST_DEBUG -# ifndef NDEBUG -# if (DEBUG || _DEBUG) -# define THRUST_DEBUG 1 -# endif // (DEBUG || _DEBUG) -# endif // NDEBUG -#endif // THRUST_DEBUG - -#if THRUST_DEBUG -# ifndef __THRUST_SYNCHRONOUS -# define __THRUST_SYNCHRONOUS 1 -# endif // __THRUST_SYNCHRONOUS -#endif // THRUST_DEBUG - diff --git a/compat/thrust/detail/config/device_system.h b/compat/thrust/detail/config/device_system.h deleted file mode 100644 index a104906753..0000000000 --- a/compat/thrust/detail/config/device_system.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -// reserve 0 for undefined -#define THRUST_DEVICE_SYSTEM_CUDA 1 -#define THRUST_DEVICE_SYSTEM_OMP 2 -#define THRUST_DEVICE_SYSTEM_TBB 3 -#define THRUST_DEVICE_SYSTEM_CPP 4 - -#ifndef THRUST_DEVICE_SYSTEM -#define THRUST_DEVICE_SYSTEM THRUST_DEVICE_SYSTEM_CUDA -#endif // THRUST_DEVICE_SYSTEM - -// XXX make the use of THRUST_DEVICE_BACKEND an error in Thrust 1.7 -// XXX eliminate the following in Thrust 1.7 - -#define THRUST_DEVICE_BACKEND_CUDA THRUST_DEVICE_SYSTEM_CUDA -#define THRUST_DEVICE_BACKEND_OMP THRUST_DEVICE_SYSTEM_OMP -#define THRUST_DEVICE_BACKEND_TBB THRUST_DEVICE_SYSTEM_TBB - -#ifdef THRUST_DEVICE_BACKEND -# if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC -# pragma message("----------------------------------------------------------------------------------") -# pragma message("| WARNING: THRUST_DEVICE_BACKEND is deprecated; use THRUST_DEVICE_SYSTEM instead |") -# pragma message("----------------------------------------------------------------------------------") -# else -# warning ---------------------------------------------------------------------------------- -# warning | WARNING: THRUST_DEVICE_BACKEND is deprecated; use THRUST_DEVICE_SYSTEM instead | -# warning ---------------------------------------------------------------------------------- -# endif // THRUST_HOST_COMPILER -# undef THRUST_DEVICE_SYSTEM -# define THRUST_DEVICE_SYSTEM THRUST_DEVICE_BACKEND -#endif // THRUST_DEVICE_BACKEND - -#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA -#define __THRUST_DEVICE_SYSTEM_NAMESPACE cuda -#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_OMP -#define __THRUST_DEVICE_SYSTEM_NAMESPACE omp -#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_TBB -#define __THRUST_DEVICE_SYSTEM_NAMESPACE tbb -#elif THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CPP -#define __THRUST_DEVICE_SYSTEM_NAMESPACE cpp -#endif - -#define __THRUST_DEVICE_SYSTEM_ROOT thrust/system/__THRUST_DEVICE_SYSTEM_NAMESPACE - diff --git a/compat/thrust/detail/config/forceinline.h b/compat/thrust/detail/config/forceinline.h deleted file mode 100644 index 620769b999..0000000000 --- a/compat/thrust/detail/config/forceinline.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file forceinline.h - * \brief Defines __thrust_forceinline__ - */ - -#pragma once - -#include - -#if defined(__CUDACC__) - -#define __thrust_forceinline__ __forceinline__ - -#else - -// TODO add - -#define __thrust_forceinline__ - -#endif - diff --git a/compat/thrust/detail/config/hd_warning_disable.h b/compat/thrust/detail/config/hd_warning_disable.h deleted file mode 100644 index b993ef2828..0000000000 --- a/compat/thrust/detail/config/hd_warning_disable.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file hd_warning_disable.h - * \brief Defines __thrust_hd_warning_disable__ - */ - -#pragma once - -#include - -#if defined(__CUDACC__) - -#define __thrust_hd_warning_disable__ \ -#pragma hd_warning_disable -#else - -#define __thrust_hd_warning_disable__ - -#endif - - diff --git a/compat/thrust/detail/config/host_device.h b/compat/thrust/detail/config/host_device.h deleted file mode 100644 index 5d0975d106..0000000000 --- a/compat/thrust/detail/config/host_device.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file host_device.h - * \brief Defines __host__ and __device__ and other CUDA-isms - */ - -#pragma once - -#include - -#if THRUST_DEVICE_SYSTEM == THRUST_DEVICE_SYSTEM_CUDA - -#include - -#else - -// since __host__ & __device__ might have already be defined, only -// #define them if not defined already -// XXX this will break if the client does #include later - -#ifndef __host__ -#define __host__ -#endif // __host__ - -#ifndef __device__ -#define __device__ -#endif // __device__ - -#endif - diff --git a/compat/thrust/detail/config/host_system.h b/compat/thrust/detail/config/host_system.h deleted file mode 100644 index fb8edabc55..0000000000 --- a/compat/thrust/detail/config/host_system.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -// reserve 0 for undefined -#define THRUST_HOST_SYSTEM_CPP 1 -#define THRUST_HOST_SYSTEM_OMP 2 -#define THRUST_HOST_SYSTEM_TBB 3 - -#ifndef THRUST_HOST_SYSTEM -#define THRUST_HOST_SYSTEM THRUST_HOST_SYSTEM_CPP -#endif // THRUST_HOST_SYSTEM - -// XXX make the use of THRUST_HOST_BACKEND an error in Thrust 1.7 -// XXX eliminate the following in Thrust 1.7 - -#define THRUST_HOST_BACKEND_CPP THRUST_HOST_SYSTEM_CPP -#define THRUST_HOST_BACKEND_OMP THRUST_HOST_SYSTEM_OMP -#define THRUST_HOST_BACKEND_TBB THRUST_HOST_SYSTEM_TBB - -#ifdef THRUST_HOST_BACKEND -# if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC -# pragma message("------------------------------------------------------------------------------") -# pragma message("| WARNING: THRUST_HOST_BACKEND is deprecated; use THRUST_HOST_SYSTEM instead |") -# pragma message("------------------------------------------------------------------------------") -# else -# warning ------------------------------------------------------------------------------ -# warning | WARNING: THRUST_HOST_BACKEND is deprecated; use THRUST_HOST_SYSTEM instead | -# warning ------------------------------------------------------------------------------ -# endif // THRUST_HOST_COMPILER -# undef THRUST_HOST_SYSTEM -# define THRUST_HOST_SYSTEM THRUST_HOST_BACKEND -#endif // THRUST_HOST_BACKEND - -#if THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_CPP -#define __THRUST_HOST_SYSTEM_NAMESPACE cpp -#elif THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_OMP -#define __THRUST_HOST_SYSTEM_NAMESPACE omp -#elif THRUST_HOST_SYSTEM == THRUST_HOST_SYSTEM_TBB -#define __THRUST_HOST_SYSTEM_NAMESPACE tbb -#endif - -#define __THRUST_HOST_SYSTEM_ROOT thrust/system/__THRUST_HOST_SYSTEM_NAMESPACE - diff --git a/compat/thrust/detail/config/simple_defines.h b/compat/thrust/detail/config/simple_defines.h deleted file mode 100644 index f9510ee9ce..0000000000 --- a/compat/thrust/detail/config/simple_defines.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file simple_defines.h - * \brief Primitive macros without dependencies. - */ - -#pragma once - -#define THRUST_UNKNOWN 0 -#define THRUST_FALSE 0 -#define THRUST_TRUE 1 - -#define THRUST_PREVENT_MACRO_SUBSTITUTION - diff --git a/compat/thrust/detail/contiguous_storage.h b/compat/thrust/detail/contiguous_storage.h deleted file mode 100644 index fe72bce2d2..0000000000 --- a/compat/thrust/detail/contiguous_storage.h +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - -// XXX parameter T is redundant with parameter Alloc -template - class contiguous_storage -{ - private: - typedef thrust::detail::allocator_traits alloc_traits; - - public: - typedef Alloc allocator_type; - typedef T value_type; - typedef typename alloc_traits::pointer pointer; - typedef typename alloc_traits::const_pointer const_pointer; - typedef typename alloc_traits::size_type size_type; - typedef typename alloc_traits::difference_type difference_type; - - // XXX we should bring reference & const_reference into allocator_traits - // at the moment, it's unclear how -- we have nothing analogous to - // rebind_pointer for references - // we either need to add reference_traits or extend the existing - // pointer_traits to support wrapped references - typedef typename Alloc::reference reference; - typedef typename Alloc::const_reference const_reference; - - typedef thrust::detail::normal_iterator iterator; - typedef thrust::detail::normal_iterator const_iterator; - - explicit contiguous_storage(const allocator_type &alloc = allocator_type()); - - explicit contiguous_storage(size_type n, const allocator_type &alloc = allocator_type()); - - ~contiguous_storage(void); - - size_type size(void) const; - - size_type max_size(void) const; - - iterator begin(void); - - const_iterator begin(void) const; - - iterator end(void); - - const_iterator end(void) const; - - reference operator[](size_type n); - - const_reference operator[](size_type n) const; - - allocator_type get_allocator(void) const; - - // note that allocate does *not* automatically call deallocate - void allocate(size_type n); - - void deallocate(void); - - void swap(contiguous_storage &x); - - void default_construct_n(iterator first, size_type n); - - void uninitialized_fill_n(iterator first, size_type n, const value_type &value); - - template - iterator uninitialized_copy(InputIterator first, InputIterator last, iterator result); - - template - iterator uninitialized_copy(thrust::execution_policy &from_system, - InputIterator first, - InputIterator last, - iterator result); - - template - iterator uninitialized_copy_n(InputIterator first, Size n, iterator result); - - template - iterator uninitialized_copy_n(thrust::execution_policy &from_system, - InputIterator first, - Size n, - iterator result); - - void destroy(iterator first, iterator last); - - private: - // XXX we could inherit from this to take advantage of empty base class optimization - allocator_type m_allocator; - - iterator m_begin; - - size_type m_size; - - // disallow assignment - contiguous_storage &operator=(const contiguous_storage &x); -}; // end contiguous_storage - -} // end detail - -template void swap(detail::contiguous_storage &lhs, detail::contiguous_storage &rhs); - -} // end thrust - -#include - diff --git a/compat/thrust/detail/contiguous_storage.inl b/compat/thrust/detail/contiguous_storage.inl deleted file mode 100644 index 7e26c26b1a..0000000000 --- a/compat/thrust/detail/contiguous_storage.inl +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include // for use of std::swap in the WAR below - -namespace thrust -{ - -namespace detail -{ - -template - contiguous_storage - ::contiguous_storage(const Alloc &alloc) - :m_allocator(alloc), - m_begin(pointer(static_cast(0))), - m_size(0) -{ - ; -} // end contiguous_storage::contiguous_storage() - -template - contiguous_storage - ::contiguous_storage(size_type n, const Alloc &alloc) - :m_allocator(alloc), - m_begin(pointer(static_cast(0))), - m_size(0) -{ - allocate(n); -} // end contiguous_storage::contiguous_storage() - -template - contiguous_storage - ::~contiguous_storage(void) -{ - deallocate(); -} // end contiguous_storage::~contiguous_storage() - -template - typename contiguous_storage::size_type - contiguous_storage - ::size(void) const -{ - return m_size; -} // end contiguous_storage::size() - -template - typename contiguous_storage::size_type - contiguous_storage - ::max_size(void) const -{ - return alloc_traits::max_size(m_allocator); -} // end contiguous_storage::max_size() - -template - typename contiguous_storage::iterator - contiguous_storage - ::begin(void) -{ - return m_begin; -} // end contiguous_storage::begin() - -template - typename contiguous_storage::const_iterator - contiguous_storage - ::begin(void) const -{ - return m_begin; -} // end contiguous_storage::begin() - -template - typename contiguous_storage::iterator - contiguous_storage - ::end(void) -{ - return m_begin + size(); -} // end contiguous_storage::end() - -template - typename contiguous_storage::const_iterator - contiguous_storage - ::end(void) const -{ - return m_begin + size(); -} // end contiguous_storage::end() - -template - typename contiguous_storage::reference - contiguous_storage - ::operator[](size_type n) -{ - return m_begin[n]; -} // end contiguous_storage::operator[]() - -template - typename contiguous_storage::const_reference - contiguous_storage - ::operator[](size_type n) const -{ - return m_begin[n]; -} // end contiguous_storage::operator[]() - -template - typename contiguous_storage::allocator_type - contiguous_storage - ::get_allocator(void) const -{ - return m_allocator; -} // end contiguous_storage::get_allocator() - -template - void contiguous_storage - ::allocate(size_type n) -{ - if(n > 0) - { - m_begin = iterator(m_allocator.allocate(n)); - m_size = n; - } // end if - else - { - m_begin = iterator(pointer(static_cast(0))); - m_size = 0; - } // end else -} // end contiguous_storage::allocate() - -template - void contiguous_storage - ::deallocate(void) -{ - if(size() > 0) - { - m_allocator.deallocate(m_begin.base(), size()); - m_begin = iterator(pointer(static_cast(0))); - m_size = 0; - } // end if -} // end contiguous_storage::deallocate() - -template - void contiguous_storage - ::swap(contiguous_storage &x) -{ - thrust::swap(m_begin, x.m_begin); - thrust::swap(m_size, x.m_size); - - // XXX WAR nvcc 4.0's "calling a __host__ function from a __host__ __device__ function is not allowed" warning - //thrust::swap(m_allocator, x.m_allocator); - std::swap(m_allocator, x.m_allocator); -} // end contiguous_storage::swap() - -template - void contiguous_storage - ::default_construct_n(iterator first, size_type n) -{ - default_construct_range(m_allocator, first.base(), n); -} // end contiguous_storage::default_construct_n() - -template - void contiguous_storage - ::uninitialized_fill_n(iterator first, size_type n, const value_type &x) -{ - fill_construct_range(m_allocator, first.base(), n, x); -} // end contiguous_storage::uninitialized_fill() - -template - template - typename contiguous_storage::iterator - contiguous_storage - ::uninitialized_copy(thrust::execution_policy &from_system, InputIterator first, InputIterator last, iterator result) -{ - return iterator(copy_construct_range(from_system, m_allocator, first, last, result.base())); -} // end contiguous_storage::uninitialized_copy() - -template - template - typename contiguous_storage::iterator - contiguous_storage - ::uninitialized_copy(InputIterator first, InputIterator last, iterator result) -{ - // XXX assumes InputIterator's associated System is default-constructible - typename thrust::iterator_system::type from_system; - - return iterator(copy_construct_range(from_system, m_allocator, first, last, result.base())); -} // end contiguous_storage::uninitialized_copy() - -template - template - typename contiguous_storage::iterator - contiguous_storage - ::uninitialized_copy_n(thrust::execution_policy &from_system, InputIterator first, Size n, iterator result) -{ - return iterator(copy_construct_range_n(from_system, m_allocator, first, n, result.base())); -} // end contiguous_storage::uninitialized_copy_n() - -template - template - typename contiguous_storage::iterator - contiguous_storage - ::uninitialized_copy_n(InputIterator first, Size n, iterator result) -{ - // XXX assumes InputIterator's associated System is default-constructible - typename thrust::iterator_system::type from_system; - - return iterator(copy_construct_range_n(from_system, m_allocator, first, n, result.base())); -} // end contiguous_storage::uninitialized_copy_n() - -template - void contiguous_storage - ::destroy(iterator first, iterator last) -{ - destroy_range(m_allocator, first.base(), last - first); -} // end contiguous_storage::destroy() - -} // end detail - -template - void swap(detail::contiguous_storage &lhs, detail::contiguous_storage &rhs) -{ - lhs.swap(rhs); -} // end swap() - -} // end thrust - diff --git a/compat/thrust/detail/copy.h b/compat/thrust/detail/copy.h deleted file mode 100644 index 8ed3abd219..0000000000 --- a/compat/thrust/detail/copy.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -template - OutputIterator copy(const thrust::detail::execution_policy_base &system, - InputIterator first, - InputIterator last, - OutputIterator result); - -template - OutputIterator copy_n(const thrust::detail::execution_policy_base &system, - InputIterator first, - Size n, - OutputIterator result); - -template - OutputIterator copy(InputIterator first, - InputIterator last, - OutputIterator result); - -template - OutputIterator copy_n(InputIterator first, - Size n, - OutputIterator result); - - -namespace detail -{ - - -template - OutputIterator two_system_copy(thrust::execution_policy &from_system, - thrust::execution_policy &two_system, - InputIterator first, - InputIterator last, - OutputIterator result); - - -template - OutputIterator two_system_copy_n(thrust::execution_policy &from_system, - thrust::execution_policy &two_system, - InputIterator first, - Size n, - OutputIterator result); - - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/copy.inl b/compat/thrust/detail/copy.inl deleted file mode 100644 index 9ac48074a0..0000000000 --- a/compat/thrust/detail/copy.inl +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputIterator copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::copy; - return copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); -} // end copy() - - -template - OutputIterator copy_n(const thrust::detail::execution_policy_base &exec, - InputIterator first, - Size n, - OutputIterator result) -{ - using thrust::system::detail::generic::copy_n; - return copy_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, result); -} // end copy_n() - - -namespace detail -{ - - -template - OutputIterator two_system_copy(thrust::execution_policy &system1, - thrust::execution_policy &system2, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - return thrust::copy(select_system(thrust::detail::derived_cast(thrust::detail::strip_const(system1)), thrust::detail::derived_cast(thrust::detail::strip_const(system2))), first, last, result); -} // end two_system_copy() - - -template - OutputIterator two_system_copy_n(thrust::execution_policy &system1, - thrust::execution_policy &system2, - InputIterator first, - Size n, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - return thrust::copy_n(select_system(thrust::detail::derived_cast(thrust::detail::strip_const(system1)), thrust::detail::derived_cast(thrust::detail::strip_const(system2))), first, n, result); -} // end two_system_copy_n() - - -} // end detail - - -template - OutputIterator copy(InputIterator first, - InputIterator last, - OutputIterator result) -{ - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::detail::two_system_copy(system1, system2, first, last, result); -} // end copy() - - -template - OutputIterator copy_n(InputIterator first, - Size n, - OutputIterator result) -{ - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::detail::two_system_copy_n(system1, system2, first, n, result); -} // end copy_n() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/copy_if.h b/compat/thrust/detail/copy_if.h deleted file mode 100644 index 54e1ef4027..0000000000 --- a/compat/thrust/detail/copy_if.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -template - OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - -template - OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - -template - OutputIterator copy_if(InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - -template - OutputIterator copy_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - -} // end thrust - -#include - diff --git a/compat/thrust/detail/copy_if.inl b/compat/thrust/detail/copy_if.inl deleted file mode 100644 index e443bb7d0b..0000000000 --- a/compat/thrust/detail/copy_if.inl +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::copy_if; - return copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred); -} // end copy_if() - - -template - OutputIterator copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::copy_if; - return copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred); -} // end copy_if() - - -template - OutputIterator copy_if(InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::copy_if(select_system(system1,system2), first, last, result, pred); -} // end copy_if() - - -template - OutputIterator copy_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred); -} // end copy_if() - - -} // end thrust - diff --git a/compat/thrust/detail/count.inl b/compat/thrust/detail/count.inl deleted file mode 100644 index d2856ae1ce..0000000000 --- a/compat/thrust/detail/count.inl +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file count.inl - * \brief Inline file for count.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - typename thrust::iterator_traits::difference_type - count(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, const EqualityComparable& value) -{ - using thrust::system::detail::generic::count; - return count(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); -} // end count() - - -template - typename thrust::iterator_traits::difference_type - count_if(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) -{ - using thrust::system::detail::generic::count_if; - return count_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end count_if() - - -template -typename thrust::iterator_traits::difference_type -count(InputIterator first, InputIterator last, const EqualityComparable& value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::count(select_system(system), first, last, value); -} // end count() - - -template -typename thrust::iterator_traits::difference_type -count_if(InputIterator first, InputIterator last, Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::count_if(select_system(system), first, last, pred); -} // end count_if() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/cstdint.h b/compat/thrust/detail/cstdint.h deleted file mode 100644 index 25d30fd5b2..0000000000 --- a/compat/thrust/detail/cstdint.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) -#include -#endif - -namespace thrust -{ -namespace detail -{ - -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) - -#if (_MSC_VER < 1300) - typedef signed char int8_t; - typedef signed short int16_t; - typedef signed int int32_t; - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; -#else - typedef signed __int8 int8_t; - typedef signed __int16 int16_t; - typedef signed __int32 int32_t; - typedef unsigned __int8 uint8_t; - typedef unsigned __int16 uint16_t; - typedef unsigned __int32 uint32_t; -#endif -typedef signed __int64 int64_t; -typedef unsigned __int64 uint64_t; - -#else - -typedef ::int8_t int8_t; -typedef ::int16_t int16_t; -typedef ::int32_t int32_t; -typedef ::int64_t int64_t; -typedef ::uint8_t uint8_t; -typedef ::uint16_t uint16_t; -typedef ::uint32_t uint32_t; -typedef ::uint64_t uint64_t; - -#endif - - -// an oracle to tell us how to define intptr_t -template struct divine_intptr_t; -template struct divine_uintptr_t; - -// 32b platforms -template<> struct divine_intptr_t<4> { typedef thrust::detail::int32_t type; }; -template<> struct divine_uintptr_t<4> { typedef thrust::detail::uint32_t type; }; - -// 64b platforms -template<> struct divine_intptr_t<8> { typedef thrust::detail::int64_t type; }; -template<> struct divine_uintptr_t<8> { typedef thrust::detail::uint64_t type; }; - -typedef divine_intptr_t<>::type intptr_t; -typedef divine_uintptr_t<>::type uintptr_t; - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/device_delete.inl b/compat/thrust/detail/device_delete.inl deleted file mode 100644 index dd70d76891..0000000000 --- a/compat/thrust/detail/device_delete.inl +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_delete.inl - * \brief Inline file for device_delete.h. - */ - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -// define an empty allocator class to use below -struct device_delete_allocator {}; - -} - -template - void device_delete(device_ptr ptr, - const size_t n) -{ - // we can use device_allocator to destroy the range - thrust::detail::device_delete_allocator a; - thrust::detail::destroy_range(a, ptr, n); - thrust::device_free(ptr); -} // end device_delete() - -} // end thrust - diff --git a/compat/thrust/detail/device_free.inl b/compat/thrust/detail/device_free.inl deleted file mode 100644 index ab8db9f09c..0000000000 --- a/compat/thrust/detail/device_free.inl +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_free.inl - * \brief Inline file for device_free.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - -void device_free(thrust::device_ptr ptr) -{ - using thrust::system::detail::generic::select_system; - - typedef thrust::iterator_system< thrust::device_ptr >::type system; - - // XXX lower to select_system(system) here - system s; - - thrust::free(s, ptr); -} // end device_free() - -} // end thrust - diff --git a/compat/thrust/detail/device_malloc.inl b/compat/thrust/detail/device_malloc.inl deleted file mode 100644 index 76d0029993..0000000000 --- a/compat/thrust/detail/device_malloc.inl +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_malloc.inl - * \brief Inline file for device_malloc.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -thrust::device_ptr device_malloc(const std::size_t n) -{ - using thrust::system::detail::generic::select_system; - - typedef thrust::iterator_system< thrust::device_ptr >::type system; - - // XXX lower to select_system(system) here - system s; - - return thrust::device_ptr(thrust::malloc(s, n).get()); -} // end device_malloc() - - -template - thrust::device_ptr device_malloc(const std::size_t n) -{ - using thrust::system::detail::generic::select_system; - - typedef thrust::iterator_system< thrust::device_ptr >::type system; - - // XXX lower to select_system(system) here - system s; - - return thrust::device_ptr(thrust::malloc(s,n).get()); -} // end device_malloc() - - -} // end thrust - diff --git a/compat/thrust/detail/device_new.inl b/compat/thrust/detail/device_new.inl deleted file mode 100644 index 1f00a97a94..0000000000 --- a/compat/thrust/detail/device_new.inl +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_new.inl - * \brief Inline file for device_new.h. - */ - -#include -#include -#include - -namespace thrust -{ - -template - device_ptr device_new(device_ptr p, - const size_t n) -{ - // XXX TODO dispatch n null device constructors at p here - // in the meantime, dispatch 1 null host constructor here - // and dispatch n copy constructors - return device_new(p, T(), n); -} // end device_new() - -template - device_ptr device_new(device_ptr p, - const T &exemplar, - const size_t n) -{ - device_ptr result(reinterpret_cast(p.get())); - - // run copy constructors at p here - thrust::uninitialized_fill(result, result + n, exemplar); - - return result; -} // end device_new() - -template - device_ptr device_new(const size_t n) -{ - // call placement new - return device_new(thrust::device_malloc(n)); -} // end device_new() - -} // thrust - diff --git a/compat/thrust/detail/device_ptr.inl b/compat/thrust/detail/device_ptr.inl deleted file mode 100644 index 0afe8a19c1..0000000000 --- a/compat/thrust/detail/device_ptr.inl +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_ptr.inl - * \brief Inline file for device_ptr.h. - */ - -#include -#include -#include - -#include -#include - -namespace thrust -{ - -template - device_ptr device_pointer_cast(T *ptr) -{ - return device_ptr(ptr); -} // end device_pointer_cast() - -template - device_ptr device_pointer_cast(const device_ptr &ptr) -{ - return ptr; -} // end device_pointer_cast() - -// output to ostream -template - std::basic_ostream &operator<<(std::basic_ostream &os, const device_ptr &p) -{ - return os << p.get(); -} // end operator<<() - - -namespace detail -{ - -template - struct is_device_ptr< thrust::device_ptr > - : public true_type -{ -}; // end is_device_ptr - -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) -// XXX WAR MSVC 2005 problem with correctly implementing -// pointer_raw_pointer for device_ptr by specializing it here -template - struct pointer_raw_pointer< thrust::device_ptr > -{ - typedef typename device_ptr::raw_pointer type; -}; // end pointer_raw_pointer -#endif - - -} // end namespace detail -} // end namespace thrust - diff --git a/compat/thrust/detail/device_reference.inl b/compat/thrust/detail/device_reference.inl deleted file mode 100644 index ad5cb76688..0000000000 --- a/compat/thrust/detail/device_reference.inl +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_reference.inl - * \brief Inline file for device_reference.h. - */ - -#include -#include - -namespace thrust -{ - -template - template - device_reference & - device_reference - ::operator=(const device_reference &other) -{ - return super_t::operator=(other); -} // end operator=() - -template - device_reference & - device_reference - ::operator=(const value_type &x) -{ - return super_t::operator=(x); -} // end operator=() - -template -__host__ __device__ -void swap(device_reference &a, device_reference &b) -{ - a.swap(b); -} // end swap() - -} // end thrust - diff --git a/compat/thrust/detail/device_vector.inl b/compat/thrust/detail/device_vector.inl deleted file mode 100644 index f6bafbaa5f..0000000000 --- a/compat/thrust/detail/device_vector.inl +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_vector.inl - * \brief Inline file for device_vector.h. - */ - -#include - -namespace thrust -{ - -template - template - device_vector - ::device_vector(const host_vector &v) - :Parent(v) -{ - ; -} // end device_vector::device_vector() - -} // end namespace thrust - diff --git a/compat/thrust/detail/dispatch/is_trivial_copy.h b/compat/thrust/detail/dispatch/is_trivial_copy.h deleted file mode 100644 index 2bedf1f716..0000000000 --- a/compat/thrust/detail/dispatch/is_trivial_copy.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file trivial_copy.h - * \brief Device implementations for copying memory between host and device. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - -namespace dispatch -{ - - -// a trivial copy's iterator's value_types match, -// the iterators themselves are normal_iterators -// and the ToIterator's value_type has_trivial_assign -template - struct is_trivial_copy : - integral_constant< - bool, - is_same< - typename thrust::iterator_value::type, - typename thrust::iterator_value::type - >::value - && is_trivial_iterator::value - && is_trivial_iterator::value - && has_trivial_assign::type>::value - > {}; - -} // end namespace dispatch - -} // end namespace detail - -} // end namespace thrust - diff --git a/compat/thrust/detail/distance.inl b/compat/thrust/detail/distance.inl deleted file mode 100644 index f37595f324..0000000000 --- a/compat/thrust/detail/distance.inl +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file distance.inl - * \brief Inline file for distance.h - */ - -#include -#include -#include - -namespace thrust -{ - - -template - inline typename thrust::iterator_traits::difference_type - distance(InputIterator first, InputIterator last) -{ - return thrust::system::detail::generic::distance(first, last); -} // end distance() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/equal.inl b/compat/thrust/detail/equal.inl deleted file mode 100644 index ca6fecccf1..0000000000 --- a/compat/thrust/detail/equal.inl +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file equal.inl - * \brief Inline file for equal.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template -bool equal(const thrust::detail::execution_policy_base &system, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) -{ - using thrust::system::detail::generic::equal; - return equal(thrust::detail::derived_cast(thrust::detail::strip_const(system)), first1, last1, first2); -} // end equal() - - -template -bool equal(const thrust::detail::execution_policy_base &system, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::equal; - return equal(thrust::detail::derived_cast(thrust::detail::strip_const(system)), first1, last1, first2, binary_pred); -} // end equal() - - -template -bool equal(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::equal(select_system(system1,system2), first1, last1, first2); -} - - -template -bool equal(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::equal(select_system(system1,system2), first1, last1, first2, binary_pred); -} - - -} // end namespace thrust - diff --git a/compat/thrust/detail/execute_with_allocator.h b/compat/thrust/detail/execute_with_allocator.h deleted file mode 100644 index 9d3c1ba29b..0000000000 --- a/compat/thrust/detail/execute_with_allocator.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -template -__host__ __device__ -ToPointer reinterpret_pointer_cast(FromPointer ptr) -{ - typedef typename thrust::detail::pointer_element::type to_element; - return ToPointer(reinterpret_cast(thrust::raw_pointer_cast(ptr))); -} - - -template class BaseSystem> - struct execute_with_allocator - : BaseSystem > -{ - Allocator &m_alloc; - - execute_with_allocator(Allocator &alloc) - : m_alloc(alloc) - {} - - template - friend thrust::pair - get_temporary_buffer(execute_with_allocator &system, std::ptrdiff_t n) - { - typedef typename thrust::detail::allocator_traits alloc_traits; - typedef typename alloc_traits::void_pointer void_pointer; - typedef typename alloc_traits::size_type size_type; - typedef typename alloc_traits::value_type value_type; - - // how many elements of type value_type do we need to accomodate n elements of type T? - size_type num_elements = thrust::detail::util::divide_ri(sizeof(T) * n, sizeof(value_type)); - - // allocate that many - void_pointer ptr = alloc_traits::allocate(system.m_alloc, num_elements); - - // return the pointer and the number of elements of type T allocated - return thrust::make_pair(thrust::detail::reinterpret_pointer_cast(ptr),n); - } - - template - friend void return_temporary_buffer(execute_with_allocator &system, Pointer p) - { - typedef typename thrust::detail::allocator_traits alloc_traits; - typedef typename alloc_traits::pointer pointer; - - // return the pointer to the allocator - pointer to_ptr = thrust::detail::reinterpret_pointer_cast(p); - alloc_traits::deallocate(system.m_alloc, to_ptr, 0); - } -}; - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/execution_policy.h b/compat/thrust/detail/execution_policy.h deleted file mode 100644 index 28e77f2e91..0000000000 --- a/compat/thrust/detail/execution_policy.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace detail -{ - - -// execution_policy_base serves as a guard against -// inifinite recursion in thrust entry points: -// -// template -// void foo(const thrust::detail::execution_policy_base &s) -// { -// using thrust::system::detail::generic::foo; -// -// foo(thrust::detail::derived_cast(thrust::detail::strip_const(s)); -// } -// -// foo is not recursive when -// 1. DerivedPolicy is derived from thrust::execution_policy below -// 2. generic::foo takes thrust::execution_policy as a parameter -template struct execution_policy_base {}; - - -template -__host__ __device__ -inline execution_policy_base &strip_const(const execution_policy_base &x) -{ - return const_cast&>(x); -} - - -template -__host__ __device__ -inline DerivedPolicy &derived_cast(execution_policy_base &x) -{ - return static_cast(x); -} - - -template -__host__ __device__ -inline const DerivedPolicy &derived_cast(const execution_policy_base &x) -{ - return static_cast(x); -} - - -} // end detail - - -template - struct execution_policy - : thrust::detail::execution_policy_base -{}; - - -} // end thrust - diff --git a/compat/thrust/detail/extrema.inl b/compat/thrust/detail/extrema.inl deleted file mode 100644 index 4bcd0bde56..0000000000 --- a/compat/thrust/detail/extrema.inl +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template -ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) -{ - using thrust::system::detail::generic::min_element; - return min_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end min_element() - - -template -ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) -{ - using thrust::system::detail::generic::min_element; - return min_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); -} // end min_element() - - -template -ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) -{ - using thrust::system::detail::generic::max_element; - return max_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end max_element() - - -template -ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) -{ - using thrust::system::detail::generic::max_element; - return max_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); -} // end max_element() - - -template -thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last) -{ - using thrust::system::detail::generic::minmax_element; - return minmax_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end minmax_element() - - -template -thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp) -{ - using thrust::system::detail::generic::minmax_element; - return minmax_element(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); -} // end minmax_element() - - -template -ForwardIterator min_element(ForwardIterator first, ForwardIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::min_element(select_system(system), first, last); -} // end min_element() - - -template -ForwardIterator min_element(ForwardIterator first, ForwardIterator last, - BinaryPredicate comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::min_element(select_system(system), first, last, comp); -} // end min_element() - - -template -ForwardIterator max_element(ForwardIterator first, ForwardIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::max_element(select_system(system), first, last); -} // end max_element() - - -template -ForwardIterator max_element(ForwardIterator first, ForwardIterator last, - BinaryPredicate comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::max_element(select_system(system), first, last, comp); -} // end max_element() - - -template -thrust::pair -minmax_element(ForwardIterator first, ForwardIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::minmax_element(select_system(system), first, last); -} // end minmax_element() - - -template -thrust::pair -minmax_element(ForwardIterator first, ForwardIterator last, BinaryPredicate comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::minmax_element(select_system(system), first, last, comp); -} // end minmax_element() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/fill.inl b/compat/thrust/detail/fill.inl deleted file mode 100644 index c60e4a059e..0000000000 --- a/compat/thrust/detail/fill.inl +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file fill.inl - * \brief Inline file for fill.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void fill(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &value) -{ - using thrust::system::detail::generic::fill; - return fill(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); -} // end fill() - - -template - OutputIterator fill_n(const thrust::detail::execution_policy_base &exec, - OutputIterator first, - Size n, - const T &value) -{ - using thrust::system::detail::generic::fill_n; - return fill_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, value); -} // end fill_n() - - -template - void fill(ForwardIterator first, - ForwardIterator last, - const T &value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - thrust::fill(select_system(system), first, last, value); -} // end fill() - - -template - OutputIterator fill_n(OutputIterator first, - Size n, - const T &value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::fill_n(select_system(system), first, n, value); -} // end fill() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/find.inl b/compat/thrust/detail/find.inl deleted file mode 100644 index 465c937395..0000000000 --- a/compat/thrust/detail/find.inl +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file find.inl - * \brief Inline file for find.h - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template -InputIterator find(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - const T& value) -{ - using thrust::system::detail::generic::find; - return find(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); -} // end find() - - -template -InputIterator find_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::find_if; - return find_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end find_if() - - -template -InputIterator find_if_not(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::find_if_not; - return find_if_not(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end find_if_not() - - -template -InputIterator find(InputIterator first, - InputIterator last, - const T& value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::find(select_system(system), first, last, value); -} - -template -InputIterator find_if(InputIterator first, - InputIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::find_if(select_system(system), first, last, pred); -} - -template -InputIterator find_if_not(InputIterator first, - InputIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::find_if_not(select_system(system), first, last, pred); -} - - -} // end namespace thrust - diff --git a/compat/thrust/detail/for_each.inl b/compat/thrust/detail/for_each.inl deleted file mode 100644 index 7c9dc172e3..0000000000 --- a/compat/thrust/detail/for_each.inl +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file for_each.inl - * \brief Inline file for for_each.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - InputIterator for_each(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - UnaryFunction f) -{ - using thrust::system::detail::generic::for_each; - - return for_each(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, f); -} - - -template -InputIterator for_each(InputIterator first, - InputIterator last, - UnaryFunction f) -{ - using thrust::system::detail::generic::select_system; - typedef typename thrust::iterator_system::type System; - - System system; - return thrust::for_each(select_system(system), first, last, f); -} // end for_each() - - -template - InputIterator for_each_n(const thrust::detail::execution_policy_base &exec, - InputIterator first, - Size n, - UnaryFunction f) -{ - using thrust::system::detail::generic::for_each_n; - - return for_each_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, f); -} // end for_each_n() - - -template -InputIterator for_each_n(InputIterator first, - Size n, - UnaryFunction f) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - return thrust::for_each_n(select_system(system), first, n, f); -} // end for_each_n() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/function.h b/compat/thrust/detail/function.h deleted file mode 100644 index 36b76c286f..0000000000 --- a/compat/thrust/detail/function.h +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template - struct host_function -{ - // mutable because Function::operator() might be const - mutable Function m_f; - - inline host_function() - : m_f() - {} - - inline host_function(const Function &f) - : m_f(f) - {} - - template - inline Result operator()(Argument &x) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x))); - } - - template - inline Result operator()(const Argument &x) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x))); - } - - template - inline Result operator()(Argument1 &x, Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline Result operator()(const Argument1 &x, Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline Result operator()(const Argument1 &x, const Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline Result operator()(Argument1 &x, const Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } -}; // end host_function - - -template - struct device_function -{ - // mutable because Function::operator() might be const - mutable Function m_f; - - inline __device__ device_function() - : m_f() - {} - - inline __device__ device_function(const Function &f) - : m_f(f) - {} - - template - inline __device__ Result operator()(Argument &x) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x))); - } - - template - inline __device__ Result operator()(const Argument &x) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x))); - } - - template - inline __device__ Result operator()(Argument1 &x, Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline __device__ Result operator()(const Argument1 &x, Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline __device__ Result operator()(const Argument1 &x, const Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline __device__ Result operator()(Argument1 &x, const Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } -}; // end device_function - - -template - struct host_device_function -{ - // mutable because Function::operator() might be const - mutable Function m_f; - - inline __host__ __device__ - host_device_function() - : m_f() - {} - - inline __host__ __device__ - host_device_function(const Function &f) - : m_f(f) - {} - - __thrust_hd_warning_disable__ - template - inline __host__ __device__ - Result operator()(Argument &x) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x))); - } - - template - inline __host__ __device__ Result operator()(const Argument &x) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x))); - } - - template - inline __host__ __device__ Result operator()(Argument1 &x, Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline __host__ __device__ Result operator()(const Argument1 &x, Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline __host__ __device__ Result operator()(const Argument1 &x, const Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } - - template - inline __host__ __device__ Result operator()(Argument1 &x, const Argument2 &y) const - { - // we static cast to Result to handle void Result without error - // in case Function's result is non-void - return static_cast(m_f(thrust::raw_reference_cast(x), thrust::raw_reference_cast(y))); - } -}; // end host_device_function - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional.inl b/compat/thrust/detail/functional.inl deleted file mode 100644 index 4024585935..0000000000 --- a/compat/thrust/detail/functional.inl +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -namespace thrust -{ - -namespace detail -{ - -template - struct unary_traits_imp; - -template - struct unary_traits_imp -{ - typedef Operation function_type; - typedef const function_type & param_type; - typedef typename Operation::result_type result_type; - typedef typename Operation::argument_type argument_type; -}; // end unary_traits_imp - -template - struct unary_traits_imp -{ - typedef Result (*function_type)(Argument); - typedef Result (*param_type)(Argument); - typedef Result result_type; - typedef Argument argument_type; -}; // end unary_traits_imp - -template - struct binary_traits_imp; - -template - struct binary_traits_imp -{ - typedef Operation function_type; - typedef const function_type & param_type; - typedef typename Operation::result_type result_type; - typedef typename Operation::first_argument_type first_argument_type; - typedef typename Operation::second_argument_type second_argument_type; -}; // end binary_traits_imp - -template - struct binary_traits_imp -{ - typedef Result (*function_type)(Argument1, Argument2); - typedef Result (*param_type)(Argument1, Argument2); - typedef Result result_type; - typedef Argument1 first_argument_type; - typedef Argument2 second_argument_type; -}; // end binary_traits_imp - -} // end detail - -template - struct unary_traits -{ - typedef typename detail::unary_traits_imp::function_type function_type; - typedef typename detail::unary_traits_imp::param_type param_type; - typedef typename detail::unary_traits_imp::result_type result_type; - typedef typename detail::unary_traits_imp::argument_type argument_type; -}; // end unary_traits - -template - struct unary_traits -{ - typedef Result (*function_type)(Argument); - typedef Result (*param_type)(Argument); - typedef Result result_type; - typedef Argument argument_type; -}; // end unary_traits - -template - struct binary_traits -{ - typedef typename detail::binary_traits_imp::function_type function_type; - typedef typename detail::binary_traits_imp::param_type param_type; - typedef typename detail::binary_traits_imp::result_type result_type; - typedef typename detail::binary_traits_imp::first_argument_type first_argument_type; - typedef typename detail::binary_traits_imp::second_argument_type second_argument_type; -}; // end binary_traits - -template - struct binary_traits -{ - typedef Result (*function_type)(Argument1, Argument2); - typedef Result (*param_type)(Argument1, Argument2); - typedef Result result_type; - typedef Argument1 first_argument_type; - typedef Argument2 second_argument_type; -}; // end binary_traits - -template - unary_negate not1(const Predicate &pred) -{ - return unary_negate(pred); -} // end not1() - -template - binary_negate not2(const BinaryPredicate &pred) -{ - return binary_negate(pred); -} // end not2() - -} // end thrust - diff --git a/compat/thrust/detail/functional/actor.h b/compat/thrust/detail/functional/actor.h deleted file mode 100644 index 0b95a6b894..0000000000 --- a/compat/thrust/detail/functional/actor.h +++ /dev/null @@ -1,192 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Portions of this code are derived from -// -// Manjunath Kudlur's Carbon library -// -// and -// -// Based on Boost.Phoenix v1.2 -// Copyright (c) 2001-2002 Joel de Guzman - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -template - struct apply_actor -{ - typedef typename Action::template result::type type; -}; - -template - struct actor - : Eval -{ - typedef Eval eval_type; - - __host__ __device__ - actor(void); - - __host__ __device__ - actor(const Eval &base); - - __host__ __device__ - typename apply_actor::type - operator()(void) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1, T2 &_2) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8) const; - - template - __host__ __device__ - typename apply_actor >::type - operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8, T9 &_9) const; - - template - __host__ __device__ - typename assign_result::type - operator=(const T &_1) const; -}; // end actor - -// in general, as_actor should turn things into values -template - struct as_actor -{ - typedef value type; - - static inline __host__ __device__ type convert(const T &x) - { - return val(x); - } // end convert() -}; // end as_actor - -// specialization for things which are already actors -template - struct as_actor > -{ - typedef actor type; - - static inline __host__ __device__ const type &convert(const actor &x) - { - return x; - } // end convert() -}; // end as_actor - -template - typename as_actor::type - __host__ __device__ - make_actor(const T &x) -{ - return as_actor::convert(x); -} // end make_actor() - -} // end functional - -// provide specializations for result_of for nullary, unary, and binary invocations of actor -template - struct result_of< - thrust::detail::functional::actor() - > -{ - typedef typename thrust::detail::functional::apply_actor< - thrust::detail::functional::actor, - thrust::null_type - >::type type; -}; // end result_of - -template - struct result_of< - thrust::detail::functional::actor(Arg1) - > -{ - typedef typename thrust::detail::functional::apply_actor< - thrust::detail::functional::actor, - thrust::tuple - >::type type; -}; // end result_of - -template - struct result_of< - thrust::detail::functional::actor(Arg1,Arg2) - > -{ - typedef typename thrust::detail::functional::apply_actor< - thrust::detail::functional::actor, - thrust::tuple - >::type type; -}; // end result_of - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/functional/actor.inl b/compat/thrust/detail/functional/actor.inl deleted file mode 100644 index 84347be7b8..0000000000 --- a/compat/thrust/detail/functional/actor.inl +++ /dev/null @@ -1,194 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Portions of this code are derived from -// -// Manjunath Kudlur's Carbon library -// -// and -// -// Based on Boost.Phoenix v1.2 -// Copyright (c) 2001-2002 Joel de Guzman - -#include -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ -namespace functional -{ - -template - actor - ::actor(void) - : eval_type() -{} - -template - actor - ::actor(const Eval &base) - : eval_type(base) -{} - -template - typename apply_actor< - typename actor::eval_type, - typename thrust::null_type - >::type - actor - ::operator()(void) const -{ - return eval_type::eval(thrust::null_type()); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0) const -{ - return eval_type::eval(thrust::tie(_0)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1) const -{ - return eval_type::eval(thrust::tie(_0,_1)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1, T2 &_2) const -{ - return eval_type::eval(thrust::tie(_0,_1,_2)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3) const -{ - return eval_type::eval(thrust::tie(_0,_1,_2,_3)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4) const -{ - return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5) const -{ - return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6) const -{ - return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7) const -{ - return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8) const -{ - return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7,_8)); -} // end basic_environment::operator() - -template - template - typename apply_actor< - typename actor::eval_type, - typename thrust::tuple - >::type - actor - ::operator()(T0 &_0, T1 &_1, T2 &_2, T3 &_3, T4 &_4, T5 &_5, T6 &_6, T7 &_7, T8 &_8, T9 &_9) const -{ - return eval_type::eval(thrust::tie(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9)); -} // end basic_environment::operator() - -template - template - typename assign_result::type - actor - ::operator=(const T& _1) const -{ - return do_assign(*this,_1); -} // end actor::operator=() - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/argument.h b/compat/thrust/detail/functional/argument.h deleted file mode 100644 index 96a20bed1f..0000000000 --- a/compat/thrust/detail/functional/argument.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Portions of this code are derived from -// -// Manjunath Kudlur's Carbon library -// -// and -// -// Based on Boost.Phoenix v1.2 -// Copyright (c) 2001-2002 Joel de Guzman - -#pragma once - -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -template - struct argument_helper -{ - typedef typename thrust::tuple_element::type type; -}; - -template - struct argument_helper -{ - typedef thrust::null_type type; -}; - - -template - class argument -{ - public: - template - struct result - : argument_helper - { - }; - - __host__ __device__ - argument(void){} - - template - __host__ __device__ - typename result::type eval(const Env &e) const - { - return thrust::get(e); - } // end eval() -}; // end argument - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/composite.h b/compat/thrust/detail/functional/composite.h deleted file mode 100644 index 1d5fde3152..0000000000 --- a/compat/thrust/detail/functional/composite.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Portions of this code are derived from -// -// Manjunath Kudlur's Carbon library -// -// and -// -// Based on Boost.Phoenix v1.2 -// Copyright (c) 2001-2002 Joel de Guzman - -#pragma once - -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -// XXX we should just take a single EvalTuple -template - class composite; - -template - class composite< - Eval0, - Eval1, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type - > -{ - public: - template - struct result - { - typedef typename Eval0::template result< - thrust::tuple< - typename Eval1::template result::type - > - >::type type; - }; - - __host__ __device__ - composite(const Eval0 &e0, const Eval1 &e1) - : m_eval0(e0), - m_eval1(e1) - {} - - template - __host__ __device__ - typename result::type - eval(const Env &x) const - { - typename Eval1::template result::type result1 = m_eval1.eval(x); - return m_eval0.eval(thrust::tie(result1)); - } - - private: - Eval0 m_eval0; - Eval1 m_eval1; -}; // end composite - -template - class composite< - Eval0, - Eval1, - Eval2, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type, - thrust::null_type - > -{ - public: - template - struct result - { - typedef typename Eval0::template result< - thrust::tuple< - typename Eval1::template result::type, - typename Eval2::template result::type - > - >::type type; - }; - - __host__ __device__ - composite(const Eval0 &e0, const Eval1 &e1, const Eval2 &e2) - : m_eval0(e0), - m_eval1(e1), - m_eval2(e2) - {} - - template - __host__ __device__ - typename result::type - eval(const Env &x) const - { - typename Eval1::template result::type result1 = m_eval1.eval(x); - typename Eval2::template result::type result2 = m_eval2.eval(x); - return m_eval0.eval(thrust::tie(result1,result2)); - } - - private: - Eval0 m_eval0; - Eval1 m_eval1; - Eval2 m_eval2; -}; // end composite - -template -__host__ __device__ - actor > compose(const Eval0 &e0, const Eval1 &e1) -{ - return actor >(composite(e0,e1)); -} - -template -__host__ __device__ - actor > compose(const Eval0 &e0, const Eval1 &e1, const Eval2 &e2) -{ - return actor >(composite(e0,e1,e2)); -} - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/operators.h b/compat/thrust/detail/functional/operators.h deleted file mode 100644 index 0fc3539cb9..0000000000 --- a/compat/thrust/detail/functional/operators.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - diff --git a/compat/thrust/detail/functional/operators/arithmetic_operators.h b/compat/thrust/detail/functional/operators/arithmetic_operators.h deleted file mode 100644 index a11e7acdd1..0000000000 --- a/compat/thrust/detail/functional/operators/arithmetic_operators.h +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -template -__host__ __device__ -actor< - composite< - unary_operator, - actor - > -> -__host__ __device__ -operator-(const actor &_1) -{ - return compose(unary_operator(), _1); -} // end operator-() - -// there's no standard unary_plus functional, so roll an ad hoc one here -template - struct unary_plus - : public thrust::unary_function -{ - __host__ __device__ T operator()(const T &x) const {return +x;} -}; // end unary_plus - -template -__host__ __device__ -actor< - composite< - unary_operator, - actor - > -> -operator+(const actor &_1) -{ - return compose(unary_operator(), _1); -} // end operator+() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator+(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator+() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator+(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator+() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator+(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator+() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator-(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator-() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator-(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator-() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator-(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator-() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator*(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator*() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator*(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator*() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator*(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator*() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator/(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator/() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator/(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator/() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator/(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator/() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator%(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator%() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator%(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator%() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator%(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator%() - -// there's no standard prefix_increment functional, so roll an ad hoc one here -template - struct prefix_increment - : public thrust::unary_function -{ - __host__ __device__ T& operator()(T &x) const { return ++x; } -}; // end prefix_increment - -template -__host__ __device__ -actor< - composite< - unary_operator, - actor - > -> -operator++(const actor &_1) -{ - return compose(unary_operator(), _1); -} // end operator++() - -// there's no standard suffix_increment functional, so roll an ad hoc one here -template - struct suffix_increment - : public thrust::unary_function -{ - __host__ __device__ T operator()(T &x) const { return x++; } -}; // end suffix_increment - -template -__host__ __device__ -actor< - composite< - unary_operator, - actor - > -> -operator++(const actor &_1, int) -{ - return compose(unary_operator(), _1); -} // end operator++() - -// there's no standard prefix_decrement functional, so roll an ad hoc one here -template - struct prefix_decrement - : public thrust::unary_function -{ - __host__ __device__ T& operator()(T &x) const { return --x; } -}; // end prefix_decrement - -template -__host__ __device__ -actor< - composite< - unary_operator, - actor - > -> -operator--(const actor &_1) -{ - return compose(unary_operator(), _1); -} // end operator--() - -// there's no standard suffix_decrement functional, so roll an ad hoc one here -template - struct suffix_decrement - : public thrust::unary_function -{ - __host__ __device__ T operator()(T &x) const { return x--; } -}; // end suffix_decrement - -template -__host__ __device__ -actor< - composite< - unary_operator, - actor - > -> -operator--(const actor &_1, int) -{ - return compose(unary_operator(), _1); -} // end operator--() - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/operators/assignment_operator.h b/compat/thrust/detail/functional/operators/assignment_operator.h deleted file mode 100644 index e5d66202bf..0000000000 --- a/compat/thrust/detail/functional/operators/assignment_operator.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ - -// XXX WAR circular inclusion with this forward declaration -template struct binary_function; - -namespace detail -{ -namespace functional -{ - -// XXX WAR circular inclusion with this forward declaration -template struct as_actor; - -// there's no standard assign functional, so roll an ad hoc one here -template - struct assign - : thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs = rhs; } -}; // end assign - -template - struct assign_result -{ - typedef actor< - composite< - binary_operator, - actor, - typename as_actor::type - > - > type; -}; // end assign_result - -template - __host__ __device__ - typename assign_result::type - do_assign(const actor &_1, const T &_2) -{ - return compose(binary_operator(), - _1, - as_actor::convert(_2)); -} // end do_assign() - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/operators/bitwise_operators.h b/compat/thrust/detail/functional/operators/bitwise_operators.h deleted file mode 100644 index c89c5d4f83..0000000000 --- a/compat/thrust/detail/functional/operators/bitwise_operators.h +++ /dev/null @@ -1,313 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator&(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator&(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator&(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator|(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator|() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator|(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator|() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator|(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator|() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator^(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator^() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator^(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator^() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator^(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator^() - -// there's no standard bit_not functional, so roll an ad hoc one here -template - struct bit_not - : public thrust::unary_function -{ - __host__ __device__ T operator()(const T &x) const {return ~x;} -}; // end bit_not - -template -__host__ __device__ -actor< - composite< - unary_operator, - actor - > -> -__host__ __device__ -operator~(const actor &_1) -{ - return compose(unary_operator(), _1); -} // end operator~() - -// there's no standard bit_lshift functional, so roll an ad hoc one here -template - struct bit_lshift - : public thrust::binary_function -{ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs << rhs;} -}; // end bit_lshift - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator<<(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<<() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator<<(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<<() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator<<(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<<() - -// there's no standard bit_rshift functional, so roll an ad hoc one here -template - struct bit_rshift - : public thrust::binary_function -{ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs >> rhs;} -}; // end bit_rshift - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator>>(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>>() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator>>(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>>() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator>>(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>>() - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/operators/compound_assignment_operators.h b/compat/thrust/detail/functional/operators/compound_assignment_operators.h deleted file mode 100644 index ef7389b55c..0000000000 --- a/compat/thrust/detail/functional/operators/compound_assignment_operators.h +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -template - struct plus_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs += rhs; } -}; // end plus_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator+=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator+=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator+=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator+=() - -template - struct minus_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T &rhs) const { return lhs -= rhs; } -}; // end minus_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator-=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator-=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator-=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator-=() - -template - struct multiplies_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs *= rhs; } -}; // end multiplies_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator*=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator*=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator*=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator*=() - -template - struct divides_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs /= rhs; } -}; // end divides_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator/=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator/=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator/=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator/=() - -template - struct modulus_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs %= rhs; } -}; // end modulus_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator%=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator%=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator%=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator%=() - -template - struct bit_and_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs &= rhs; } -}; // end bit_and_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator&=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator&=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&=() - -template - struct bit_or_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs |= rhs; } -}; // end bit_or_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator|=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator|=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator|=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator|=() - -template - struct bit_xor_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs ^= rhs; } -}; // end bit_xor_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator^=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator|=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator^=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator|=() - -template - struct bit_lshift_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs <<= rhs; } -}; // end bit_lshift_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator<<=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<<=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator<<=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<<=() - -template - struct bit_rshift_equal - : public thrust::binary_function -{ - __host__ __device__ T& operator()(T &lhs, const T&rhs) const { return lhs >>= rhs; } -}; // end bit_rshift_equal - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator>>=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>>=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator>>=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>>=() - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/operators/logical_operators.h b/compat/thrust/detail/functional/operators/logical_operators.h deleted file mode 100644 index 9c952620db..0000000000 --- a/compat/thrust/detail/functional/operators/logical_operators.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator&&(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&&() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator&&(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&&() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator&&(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&&() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator||(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&&() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator||(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&&() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator||(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator&&() - -template -__host__ __device__ -actor< - composite< - unary_operator, - actor - > -> -operator!(const actor &_1) -{ - return compose(unary_operator(), _1); -} // end operator!() - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/operators/operator_adaptors.h b/compat/thrust/detail/functional/operators/operator_adaptors.h deleted file mode 100644 index d35fe9726b..0000000000 --- a/compat/thrust/detail/functional/operators/operator_adaptors.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -// this thing (which models Eval) is an adaptor for the unary -// functors inside functional.h -template class UnaryOperator> - struct unary_operator -{ - template - struct argument - : thrust::detail::eval_if< - (thrust::tuple_size::value == 0), - thrust::detail::identity_, - thrust::tuple_element<0,Env> - > - { - }; - - template - struct operator_type - { - typedef UnaryOperator< - typename thrust::detail::remove_reference< - typename argument::type - >::type - > type; - }; - - template - struct result - { - typedef typename operator_type::type op_type; - typedef typename op_type::result_type type; - }; - - template - __host__ __device__ - typename result::type eval(const Env &e) const - { - typename operator_type::type op; - return op(thrust::get<0>(e)); - } // end eval() -}; // end unary_operator - -// this thing (which models Eval) is an adaptor for the binary -// functors inside functional.h -template class BinaryOperator> - struct binary_operator -{ - template - struct first_argument - : thrust::detail::eval_if< - (thrust::tuple_size::value == 0), - thrust::detail::identity_, - thrust::tuple_element<0,Env> - > - { - }; - - template - struct operator_type - { - typedef BinaryOperator< - typename thrust::detail::remove_reference< - typename first_argument::type - >::type - > type; - }; - - template - struct result - { - typedef typename operator_type::type op_type; - typedef typename op_type::result_type type; - }; - - template - __host__ __device__ - typename result::type eval(const Env &e) const - { - typename operator_type::type op; - return op(thrust::get<0>(e), thrust::get<1>(e)); - } // end eval() -}; // end binary_operator - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/operators/relational_operators.h b/compat/thrust/detail/functional/operators/relational_operators.h deleted file mode 100644 index 6b26534430..0000000000 --- a/compat/thrust/detail/functional/operators/relational_operators.h +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator==(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator==() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator==(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator==() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator==(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator==() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator!=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator!=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator!=(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator!=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator!=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator!=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator>(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator>(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator>(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator<(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator<(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator<(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator>=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator>=(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator>=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator>=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - typename as_actor::type - > -> -operator<=(const actor &_1, const T2 &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - typename as_actor::type, - actor - > -> -operator<=(const T1 &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<=() - -template -__host__ __device__ -actor< - composite< - binary_operator, - actor, - actor - > -> -operator<=(const actor &_1, const actor &_2) -{ - return compose(binary_operator(), - make_actor(_1), - make_actor(_2)); -} // end operator<=() - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/placeholder.h b/compat/thrust/detail/functional/placeholder.h deleted file mode 100644 index 9acf6da803..0000000000 --- a/compat/thrust/detail/functional/placeholder.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - -template - struct placeholder -{ - typedef actor > type; -}; - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/functional/value.h b/compat/thrust/detail/functional/value.h deleted file mode 100644 index 27e2802e3a..0000000000 --- a/compat/thrust/detail/functional/value.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Portions of this code are derived from -// -// Manjunath Kudlur's Carbon library -// -// and -// -// Based on Boost.Phoenix v1.2 -// Copyright (c) 2001-2002 Joel de Guzman - -#pragma once - -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace functional -{ - - -template struct actor; - - -template - class value -{ - public: - - template - struct result - { - typedef T type; - }; - - __host__ __device__ - value(const T &arg) - : m_val(arg) - {} - - template - __host__ __device__ - T eval(const Env &) const - { - return m_val; - } - - private: - T m_val; -}; // end value - -template -__host__ __device__ -actor > val(const T &x) -{ - return value(x); -} // end val() - - -} // end functional -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/gather.inl b/compat/thrust/detail/gather.inl deleted file mode 100644 index 4edecd038a..0000000000 --- a/compat/thrust/detail/gather.inl +++ /dev/null @@ -1,160 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file gather.inl - * \brief Inline file for gather.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputIterator gather(const thrust::detail::execution_policy_base &exec, - InputIterator map_first, - InputIterator map_last, - RandomAccessIterator input_first, - OutputIterator result) -{ - using thrust::system::detail::generic::gather; - return gather(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, input_first, result); -} // end gather() - - -template - OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result) -{ - using thrust::system::detail::generic::gather_if; - return gather_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, stencil, input_first, result); -} // end gather_if() - - -template - OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::gather_if; - return gather_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), map_first, map_last, stencil, input_first, result, pred); -} // end gather_if() - - -template - OutputIterator gather(InputIterator map_first, - InputIterator map_last, - RandomAccessIterator input_first, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::gather(select_system(system1,system2,system3), map_first, map_last, input_first, result); -} // end gather() - - -template - OutputIterator gather_if(InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::gather_if(select_system(system1,system2,system3,system4), map_first, map_last, stencil, input_first, result); -} // end gather_if() - - -template - OutputIterator gather_if(InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::gather_if(select_system(system1,system2,system3,system4), map_first, map_last, stencil, input_first, result, pred); -} // end gather_if() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/generate.inl b/compat/thrust/detail/generate.inl deleted file mode 100644 index c12580452e..0000000000 --- a/compat/thrust/detail/generate.inl +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file generate.inl - * \author Jared Hoberock - * \brief Inline file for generate.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void generate(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Generator gen) -{ - using thrust::system::detail::generic::generate; - return generate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, gen); -} // end generate() - - -template - OutputIterator generate_n(const thrust::detail::execution_policy_base &exec, - OutputIterator first, - Size n, - Generator gen) -{ - using thrust::system::detail::generic::generate_n; - return generate_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, gen); -} // end generate_n() - - -template - void generate(ForwardIterator first, - ForwardIterator last, - Generator gen) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::generate(select_system(system), first, last, gen); -} // end generate() - - -template - OutputIterator generate_n(OutputIterator first, - Size n, - Generator gen) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::generate_n(select_system(system), first, n, gen); -} // end generate_n() - - -} // end thrust - diff --git a/compat/thrust/detail/host_vector.inl b/compat/thrust/detail/host_vector.inl deleted file mode 100644 index e5c60ab973..0000000000 --- a/compat/thrust/detail/host_vector.inl +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file host_vector.inl - * \brief Inline file for host_vector.h. - */ - -#include - -namespace thrust -{ - -template - template - host_vector - ::host_vector(const device_vector &v) - :Parent(v) -{ - ; -} // end host_vector::host_vector() - -} // end namespace thrust - diff --git a/compat/thrust/detail/inner_product.inl b/compat/thrust/detail/inner_product.inl deleted file mode 100644 index f7773d8d2c..0000000000 --- a/compat/thrust/detail/inner_product.inl +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file inner_product.inl - * \brief Inline file for inner_product.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template -OutputType inner_product(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputType init) -{ - using thrust::system::detail::generic::inner_product; - return inner_product(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, init); -} // end inner_product() - - -template -OutputType inner_product(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputType init, - BinaryFunction1 binary_op1, - BinaryFunction2 binary_op2) -{ - using thrust::system::detail::generic::inner_product; - return inner_product(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, init, binary_op1, binary_op2); -} // end inner_product() - - -template -OutputType -inner_product(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputType init) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::inner_product(select_system(system1,system2), first1, last1, first2, init); -} // end inner_product() - - -template -OutputType -inner_product(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputType init, - BinaryFunction1 binary_op1, BinaryFunction2 binary_op2) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::inner_product(select_system(system1,system2), first1, last1, first2, init, binary_op1, binary_op2); -} // end inner_product() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/integer_traits.h b/compat/thrust/detail/integer_traits.h deleted file mode 100644 index e4cf5d159f..0000000000 --- a/compat/thrust/detail/integer_traits.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - -template - class integer_traits -{ - public: - static const bool is_integral = false; -}; - -template - class integer_traits_base -{ - public: - static const bool is_integral = true; - static const T const_min = min_val; - static const T const_max = max_val; -}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - - -template<> - class integer_traits - : public std::numeric_limits, - public integer_traits_base -{}; - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/internal_functional.h b/compat/thrust/detail/internal_functional.h deleted file mode 100644 index 6d5264ae35..0000000000 --- a/compat/thrust/detail/internal_functional.h +++ /dev/null @@ -1,678 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file internal_functional.inl - * \brief Non-public functionals used to implement algorithm internals. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include // for ::new - -namespace thrust -{ -namespace detail -{ - -// unary_negate does not need to know argument_type -template -struct unary_negate -{ - typedef bool result_type; - - Predicate pred; - - __host__ __device__ - explicit unary_negate(const Predicate& pred) : pred(pred) {} - - template - __host__ __device__ - bool operator()(const T& x) - { - return !bool(pred(x)); - } -}; - -// binary_negate does not need to know first_argument_type or second_argument_type -template -struct binary_negate -{ - typedef bool result_type; - - Predicate pred; - - __host__ __device__ - explicit binary_negate(const Predicate& pred) : pred(pred) {} - - template - __host__ __device__ - bool operator()(const T1& x, const T2& y) - { - return !bool(pred(x,y)); - } -}; - -template - __host__ __device__ - thrust::detail::unary_negate not1(const Predicate &pred) -{ - return thrust::detail::unary_negate(pred); -} - -template - __host__ __device__ - thrust::detail::binary_negate not2(const Predicate &pred) -{ - return thrust::detail::binary_negate(pred); -} - - -// convert a predicate to a 0 or 1 integral value -template -struct predicate_to_integral -{ - Predicate pred; - - __host__ __device__ - explicit predicate_to_integral(const Predicate& pred) : pred(pred) {} - - template - __host__ __device__ - bool operator()(const T& x) - { - return pred(x) ? IntegralType(1) : IntegralType(0); - } -}; - - -// note that detail::equal_to does not force conversion from T2 -> T1 as equal_to does -template -struct equal_to -{ - typedef bool result_type; - - template - __host__ __device__ - bool operator()(const T1& lhs, const T2& rhs) const - { - return lhs == rhs; - } -}; - -// note that equal_to_value does not force conversion from T2 -> T1 as equal_to does -template -struct equal_to_value -{ - T2 rhs; - - equal_to_value(const T2& rhs) : rhs(rhs) {} - - template - __host__ __device__ - bool operator()(const T1& lhs) const - { - return lhs == rhs; - } -}; - -template -struct tuple_binary_predicate -{ - typedef bool result_type; - - __host__ __device__ - tuple_binary_predicate(const Predicate& p) : pred(p) {} - - template - __host__ __device__ - bool operator()(const Tuple& t) const - { - return pred(thrust::get<0>(t), thrust::get<1>(t)); - } - - Predicate pred; -}; - -template -struct tuple_not_binary_predicate -{ - typedef bool result_type; - - __host__ __device__ - tuple_not_binary_predicate(const Predicate& p) : pred(p) {} - - template - __host__ __device__ - bool operator()(const Tuple& t) const - { - return !pred(thrust::get<0>(t), thrust::get<1>(t)); - } - - Predicate pred; -}; - -template - struct host_generate_functor -{ - typedef void result_type; - - __host__ __device__ - host_generate_functor(Generator g) - : gen(g) {} - - // operator() does not take an lvalue reference because some iterators - // produce temporary proxy references when dereferenced. for example, - // consider the temporary tuple of references produced by zip_iterator. - // such temporaries cannot bind to an lvalue reference. - // - // to WAR this, accept a const reference (which is bindable to a temporary), - // and const_cast in the implementation. - // - // XXX change to an rvalue reference upon c++0x (which either a named variable - // or temporary can bind to) - template - __host__ - void operator()(const T &x) - { - // we have to be naughty and const_cast this to get it to work - T &lvalue = const_cast(x); - - // this assigns correctly whether x is a true reference or proxy - lvalue = gen(); - } - - Generator gen; -}; - -template - struct device_generate_functor -{ - typedef void result_type; - - __host__ __device__ - device_generate_functor(Generator g) - : gen(g) {} - - // operator() does not take an lvalue reference because some iterators - // produce temporary proxy references when dereferenced. for example, - // consider the temporary tuple of references produced by zip_iterator. - // such temporaries cannot bind to an lvalue reference. - // - // to WAR this, accept a const reference (which is bindable to a temporary), - // and const_cast in the implementation. - // - // XXX change to an rvalue reference upon c++0x (which either a named variable - // or temporary can bind to) - template - __host__ __device__ - void operator()(const T &x) - { - // we have to be naughty and const_cast this to get it to work - T &lvalue = const_cast(x); - - // this assigns correctly whether x is a true reference or proxy - lvalue = gen(); - } - - Generator gen; -}; - -template - struct generate_functor - : thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - > -{}; - - -template - struct zipped_binary_op -{ - typedef ResultType result_type; - - __host__ __device__ - zipped_binary_op(BinaryFunction binary_op) - : m_binary_op(binary_op) {} - - template - __host__ __device__ - inline result_type operator()(Tuple t) - { - return m_binary_op(thrust::get<0>(t), thrust::get<1>(t)); - } - - BinaryFunction m_binary_op; -}; - - -template - struct is_non_const_reference - : thrust::detail::and_< - thrust::detail::not_ >, - thrust::detail::is_reference - > -{}; - -template struct is_tuple_of_iterator_references : thrust::detail::false_type {}; - -template - struct is_tuple_of_iterator_references< - thrust::detail::tuple_of_iterator_references< - T1,T2,T3,T4,T5,T6,T7,T8,T9,T10 - > - > - : thrust::detail::true_type -{}; - -// use this enable_if to avoid assigning to temporaries in the transform functors below -// XXX revisit this problem with c++11 perfect forwarding -template - struct enable_if_non_const_reference_or_tuple_of_iterator_references - : thrust::detail::enable_if< - is_non_const_reference::value || is_tuple_of_iterator_references::value - > -{}; - - -template - struct host_unary_transform_functor -{ - typedef void result_type; - - UnaryFunction f; - - host_unary_transform_functor(UnaryFunction f_) - :f(f_) {} - - template - inline __host__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<1,Tuple>::type - >::type - operator()(Tuple t) - { - thrust::get<1>(t) = f(thrust::get<0>(t)); - } -}; - -template - struct device_unary_transform_functor -{ - typedef void result_type; - - UnaryFunction f; - - device_unary_transform_functor(UnaryFunction f_) - :f(f_) {} - - // add __host__ to allow the omp backend compile with nvcc - template - inline __host__ __device__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<1,Tuple>::type - >::type - operator()(Tuple t) - { - thrust::get<1>(t) = f(thrust::get<0>(t)); - } -}; - - -template - struct unary_transform_functor - : thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - > -{}; - - -template - struct host_binary_transform_functor -{ - BinaryFunction f; - - host_binary_transform_functor(BinaryFunction f_) - :f(f_) - {} - - template - __host__ - void operator()(Tuple t) - { - thrust::get<2>(t) = f(thrust::get<0>(t), thrust::get<1>(t)); - } -}; // end binary_transform_functor - - -template - struct device_binary_transform_functor -{ - BinaryFunction f; - - device_binary_transform_functor(BinaryFunction f_) - :f(f_) - {} - - // add __host__ to allow the omp backend compile with nvcc - template - inline __host__ __device__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<2,Tuple>::type - >::type - operator()(Tuple t) - { - thrust::get<2>(t) = f(thrust::get<0>(t), thrust::get<1>(t)); - } -}; // end binary_transform_functor - - -template - struct binary_transform_functor - : thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - > -{}; - - -template -struct host_unary_transform_if_functor -{ - UnaryFunction unary_op; - Predicate pred; - - host_unary_transform_if_functor(UnaryFunction unary_op_, Predicate pred_) - : unary_op(unary_op_), pred(pred_) {} - - template - inline __host__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<1,Tuple>::type - >::type - operator()(Tuple t) - { - if(pred(thrust::get<0>(t))) - { - thrust::get<1>(t) = unary_op(thrust::get<0>(t)); - } - } -}; // end host_unary_transform_if_functor - - -template -struct device_unary_transform_if_functor -{ - UnaryFunction unary_op; - Predicate pred; - - device_unary_transform_if_functor(UnaryFunction unary_op_, Predicate pred_) - : unary_op(unary_op_), pred(pred_) {} - - template - inline __host__ __device__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<1,Tuple>::type - >::type - operator()(Tuple t) - { - if(pred(thrust::get<0>(t))) - { - thrust::get<1>(t) = unary_op(thrust::get<0>(t)); - } - } -}; // end device_unary_transform_if_functor - - -template - struct unary_transform_if_functor - : thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - > -{}; - - -template -struct host_unary_transform_if_with_stencil_functor -{ - UnaryFunction unary_op; - Predicate pred; - - host_unary_transform_if_with_stencil_functor(UnaryFunction _unary_op, Predicate _pred) - : unary_op(_unary_op), pred(_pred) {} - - template - inline __host__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<2,Tuple>::type - >::type - operator()(Tuple t) - { - if(pred(thrust::get<1>(t))) - thrust::get<2>(t) = unary_op(thrust::get<0>(t)); - } -}; // end host_unary_transform_if_with_stencil_functor - - -template -struct device_unary_transform_if_with_stencil_functor -{ - UnaryFunction unary_op; - Predicate pred; - - device_unary_transform_if_with_stencil_functor(UnaryFunction _unary_op, Predicate _pred) - : unary_op(_unary_op), pred(_pred) {} - - // add __host__ to allow the omp backend compile with nvcc - template - inline __host__ __device__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<2,Tuple>::type - >::type - operator()(Tuple t) - { - if(pred(thrust::get<1>(t))) - thrust::get<2>(t) = unary_op(thrust::get<0>(t)); - } -}; // end device_unary_transform_if_with_stencil_functor - - -template - struct unary_transform_if_with_stencil_functor - : thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - > -{}; - - -template -struct host_binary_transform_if_functor -{ - BinaryFunction binary_op; - Predicate pred; - - host_binary_transform_if_functor(BinaryFunction _binary_op, Predicate _pred) - : binary_op(_binary_op), pred(_pred) {} - - template - inline __host__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<3,Tuple>::type - >::type - operator()(Tuple t) - { - if(pred(thrust::get<2>(t))) - thrust::get<3>(t) = binary_op(thrust::get<0>(t), thrust::get<1>(t)); - } -}; // end host_binary_transform_if_functor - - -template -struct device_binary_transform_if_functor -{ - BinaryFunction binary_op; - Predicate pred; - - device_binary_transform_if_functor(BinaryFunction _binary_op, Predicate _pred) - : binary_op(_binary_op), pred(_pred) {} - - // add __host__ to allow the omp backend compile with nvcc - template - inline __host__ __device__ - typename enable_if_non_const_reference_or_tuple_of_iterator_references< - typename thrust::tuple_element<3,Tuple>::type - >::type - operator()(Tuple t) - { - if(pred(thrust::get<2>(t))) - thrust::get<3>(t) = binary_op(thrust::get<0>(t), thrust::get<1>(t)); - } -}; // end device_binary_transform_if_functor - - -template - struct binary_transform_if_functor - : thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - > -{}; - - -template - struct host_destroy_functor -{ - __host__ - void operator()(T &x) const - { - x.~T(); - } // end operator()() -}; // end host_destroy_functor - - -template - struct device_destroy_functor -{ - // add __host__ to allow the omp backend to compile with nvcc - __host__ __device__ - void operator()(T &x) const - { - x.~T(); - } // end operator()() -}; // end device_destroy_functor - - -template - struct destroy_functor - : thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - > -{}; - - -template -struct fill_functor -{ - const T exemplar; - - fill_functor(const T& _exemplar) - : exemplar(_exemplar) {} - - __host__ __device__ - T operator()(void) const - { - return exemplar; - } -}; - - -template - struct uninitialized_fill_functor -{ - T exemplar; - - uninitialized_fill_functor(T x):exemplar(x){} - - __host__ __device__ - void operator()(T &x) - { - ::new(static_cast(&x)) T(exemplar); - } // end operator()() -}; // end uninitialized_fill_functor - - -// this predicate tests two two-element tuples -// we first use a Compare for the first element -// if the first elements are equivalent, we use -// < for the second elements -template - struct compare_first_less_second -{ - compare_first_less_second(Compare c) - : comp(c) {} - - template - __host__ __device__ - bool operator()(T1 lhs, T2 rhs) - { - return comp(thrust::get<0>(lhs), thrust::get<0>(rhs)) || (!comp(thrust::get<0>(rhs), thrust::get<0>(lhs)) && thrust::get<1>(lhs) < thrust::get<1>(rhs)); - } - - Compare comp; -}; // end compare_first_less_second - - -template - struct compare_first -{ - Compare comp; - - compare_first(Compare comp) - : comp(comp) - {} - - template - __host__ __device__ - bool operator()(const Tuple1 &x, const Tuple2 &y) - { - return comp(thrust::raw_reference_cast(thrust::get<0>(x)), thrust::raw_reference_cast(thrust::get<0>(y))); - } -}; // end compare_first - - -} // end namespace detail -} // end namespace thrust - diff --git a/compat/thrust/detail/logical.inl b/compat/thrust/detail/logical.inl deleted file mode 100644 index 126a3e3fb1..0000000000 --- a/compat/thrust/detail/logical.inl +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file logical.inl - * \brief Inline file for logical.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template -bool all_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) -{ - using thrust::system::detail::generic::all_of; - return all_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end all_of() - - -template -bool any_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) -{ - using thrust::system::detail::generic::any_of; - return any_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end any_of() - - -template -bool none_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred) -{ - using thrust::system::detail::generic::none_of; - return none_of(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end none_of() - - -template -bool all_of(InputIterator first, InputIterator last, Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::all_of(select_system(system), first, last, pred); -} - - -template -bool any_of(InputIterator first, InputIterator last, Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::any_of(select_system(system), first, last, pred); -} - - -template -bool none_of(InputIterator first, InputIterator last, Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::none_of(select_system(system), first, last, pred); -} - - -} // end namespace thrust - diff --git a/compat/thrust/detail/malloc_and_free.h b/compat/thrust/detail/malloc_and_free.h deleted file mode 100644 index 57b1685476..0000000000 --- a/compat/thrust/detail/malloc_and_free.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -template -pointer malloc(const thrust::detail::execution_policy_base &exec, std::size_t n) -{ - using thrust::system::detail::generic::malloc; - - // XXX should use a hypothetical thrust::static_pointer_cast here - void *raw_ptr = static_cast(thrust::raw_pointer_cast(malloc(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n))); - - return pointer(raw_ptr); -} - -template -pointer malloc(const thrust::detail::execution_policy_base &exec, std::size_t n) -{ - using thrust::system::detail::generic::malloc; - - T *raw_ptr = static_cast(thrust::raw_pointer_cast(malloc(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n))); - - return pointer(raw_ptr); -} - - -// XXX WAR nvbug 992955 -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC -#if CUDA_VERSION < 5000 - -// cudafe generates unqualified calls to free(int *volatile) -// which get confused with thrust::free -// spoof a thrust::free which simply maps to ::free -inline __host__ __device__ -void free(int *volatile ptr) -{ - ::free(ptr); -} - -#endif // CUDA_VERSION -#endif // THRUST_DEVICE_COMPILER - -template -void free(const thrust::detail::execution_policy_base &exec, Pointer ptr) -{ - using thrust::system::detail::generic::free; - - free(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), ptr); -} - -// XXX consider another form of free which does not take a system argument and -// instead infers the system from the pointer - -} // end namespace thrust - diff --git a/compat/thrust/detail/merge.inl b/compat/thrust/detail/merge.inl deleted file mode 100644 index 77f09f5bed..0000000000 --- a/compat/thrust/detail/merge.inl +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file merge.inl - * \brief Inline file for merge.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputIterator merge(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::merge; - return merge(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); -} // end merge() - - -template - OutputIterator merge(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::merge; - return merge(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); -} // end merge() - - -template - thrust::pair - merge_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::merge_by_key; - return merge_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); -} // end merge_by_key() - - -template - thrust::pair - merge_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - Compare comp) -{ - using thrust::system::detail::generic::merge_by_key; - return merge_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} // end merge_by_key() - - -template - OutputIterator merge(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::merge(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); -} // end merge() - - -template - OutputIterator merge(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::merge(select_system(system1,system2,system3), first1, last1, first2, last2, result); -} // end merge() - - -template - thrust::pair - merge_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - typedef typename thrust::iterator_system::type System6; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - System6 system6; - - return thrust::merge_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} // end merge_by_key() - - -template - thrust::pair - merge_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - typedef typename thrust::iterator_system::type System6; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - System6 system6; - - return thrust::merge_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); -} // end merge_by_key() - - -} // end thrust - diff --git a/compat/thrust/detail/minmax.h b/compat/thrust/detail/minmax.h deleted file mode 100644 index a560ea1fe5..0000000000 --- a/compat/thrust/detail/minmax.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ - - -template -__host__ __device__ - T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp) -{ - return comp(rhs, lhs) ? rhs : lhs; -} // end min() - -template -__host__ __device__ - T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs) -{ - return rhs < lhs ? rhs : lhs; -} // end min() - -template -__host__ __device__ - T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp) -{ - return comp(lhs,rhs) ? rhs : lhs; -} // end max() - -template -__host__ __device__ - T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs) -{ - return lhs < rhs ? rhs : lhs; -} // end max() - - -} // end thrust - diff --git a/compat/thrust/detail/mismatch.inl b/compat/thrust/detail/mismatch.inl deleted file mode 100644 index 37ac663bbe..0000000000 --- a/compat/thrust/detail/mismatch.inl +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file mismatch.inl - * \brief Inline file for mismatch.h - */ - - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template -thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2) -{ - using thrust::system::detail::generic::mismatch; - return mismatch(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2); -} // end mismatch() - - -template -thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - BinaryPredicate pred) -{ - using thrust::system::detail::generic::mismatch; - return mismatch(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, pred); -} // end mismatch() - - -template -thrust::pair mismatch(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::mismatch(select_system(system1,system2), first1, last1, first2); -} // end mismatch() - - -template -thrust::pair mismatch(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - BinaryPredicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::mismatch(select_system(system1,system2), first1, last1, first2, pred); -} // end mismatch() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/mpl/math.h b/compat/thrust/detail/mpl/math.h deleted file mode 100644 index 80adfc1e88..0000000000 --- a/compat/thrust/detail/mpl/math.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file math.h - * \brief Math-related metaprogramming functionality. - */ - - -#pragma once - -namespace thrust -{ - -namespace detail -{ - -namespace mpl -{ - -namespace math -{ - -namespace detail -{ - -// compute the log base-2 of an integer at compile time -template -struct log2 -{ - static const unsigned int value = log2::value; -}; - -template -struct log2<1, Cur> -{ - static const unsigned int value = Cur; -}; - -template -struct log2<0, Cur> -{ - // undefined -}; - -} // end namespace detail - - -template -struct log2 -{ - static const unsigned int value = detail::log2::value; -}; - - -template -struct min -{ - static const T value = (lhs < rhs) ? lhs : rhs; -}; - - -template -struct max -{ - static const T value = (!(lhs < rhs)) ? lhs : rhs; -}; - - -template - struct mul -{ - static const result_type value = x * y; -}; - - -template - struct mod -{ - static const result_type value = x % y; -}; - - -template - struct div -{ - static const result_type value = x / y; -}; - - -template - struct geq -{ - static const bool value = x >= y; -}; - - -template - struct lt -{ - static const bool value = x < y; -}; - - -template - struct gt -{ - static const bool value = x > y; -}; - - -template - struct or_ -{ - static const bool value = (x || y); -}; - - -template - struct bit_and -{ - static const result_type value = x & y; -}; - - -template - struct plus -{ - static const result_type value = x + y; -}; - - -template - struct minus -{ - static const result_type value = x - y; -}; - - -template - struct equal -{ - static const bool value = x == y; -}; - - -template - struct is_odd -{ - static const bool value = x & 1; -}; - - -} // end namespace math - -} // end namespace mpl - -} // end namespace detail - -} // end namespace thrust - diff --git a/compat/thrust/detail/numeric_traits.h b/compat/thrust/detail/numeric_traits.h deleted file mode 100644 index a3bc56c211..0000000000 --- a/compat/thrust/detail/numeric_traits.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -//#include // for intmax_t (not provided on MSVS 2005) - -namespace thrust -{ - -namespace detail -{ - -// XXX good enough for the platforms we care about -typedef long long intmax_t; - -template - struct is_signed - : integral_constant::is_signed> -{}; // end is_signed - - -template - struct num_digits - : eval_if< - std::numeric_limits::is_specialized, - integral_constant< - int, - std::numeric_limits::digits - >, - integral_constant< - int, - sizeof(T) * std::numeric_limits::digits - (is_signed::value ? 1 : 0) - > - >::type -{}; // end num_digits - - -template - struct integer_difference - //: eval_if< - // sizeof(Integer) >= sizeof(intmax_t), - // eval_if< - // is_signed::value, - // identity_, - // identity_ - // >, - // eval_if< - // sizeof(Integer) < sizeof(std::ptrdiff_t), - // identity_, - // identity_ - // > - // > -{ - private: - // XXX workaround a pedantic warning in old versions of g++ - // which complains about &&ing with a constant value - template - struct and_ - { - static const bool value = false; - }; - - template - struct and_ - { - static const bool value = y; - }; - - public: - typedef typename - eval_if< - and_< - std::numeric_limits::is_signed, - // digits is the number of no-sign bits - (!std::numeric_limits::is_bounded || (int(std::numeric_limits::digits) + 1 >= num_digits::value)) - >::value, - identity_, - eval_if< - int(std::numeric_limits::digits) + 1 < num_digits::value, - identity_, - eval_if< - int(std::numeric_limits::digits) + 1 < num_digits::value, - identity_, - identity_ - > - > - >::type type; -}; // end integer_difference - - -template - struct numeric_difference - : eval_if< - is_integral::value, - integer_difference, - identity_ - > -{}; // end numeric_difference - - -template -__host__ __device__ -typename numeric_difference::type -numeric_distance(Number x, Number y) -{ - typedef typename numeric_difference::type difference_type; - return difference_type(y) - difference_type(x); -} // end numeric_distance - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/overlapped_copy.h b/compat/thrust/detail/overlapped_copy.h deleted file mode 100644 index a5540b8643..0000000000 --- a/compat/thrust/detail/overlapped_copy.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template - OutputIterator sequential_copy(InputIterator first, - InputIterator last, - OutputIterator result) -{ - for(; first != last; ++first, ++result) - { - *result = *first; - } // end for - - return result; -} // end sequential_copy() - - -template - BidirectionalIterator2 sequential_copy_backward(BidirectionalIterator1 first, - BidirectionalIterator1 last, - BidirectionalIterator2 result) -{ - // yes, we preincrement - // the ranges are open on the right, i.e. [first, last) - while(first != last) - { - *--result = *--last; - } // end while - - return result; -} // end sequential_copy_backward() - - -namespace dispatch -{ - - -template - RandomAccessIterator2 overlapped_copy(thrust::system::cpp::detail::execution_policy &, - RandomAccessIterator1 first, - RandomAccessIterator1 last, - RandomAccessIterator2 result) -{ - if(first < last && first <= result && result < last) - { - // result lies in [first, last) - // it's safe to use std::copy_backward here - thrust::detail::sequential_copy_backward(first, last, result + (last - first)); - result += (last - first); - } // end if - else - { - // result + (last - first) lies in [first, last) - // it's safe to use sequential_copy here - result = thrust::detail::sequential_copy(first, last, result); - } // end else - - return result; -} // end overlapped_copy() - - -template - RandomAccessIterator2 overlapped_copy(thrust::execution_policy &exec, - RandomAccessIterator1 first, - RandomAccessIterator1 last, - RandomAccessIterator2 result) -{ - typedef typename thrust::iterator_value::type value_type; - - // make a temporary copy of [first,last), and copy into it first - thrust::detail::temporary_array temp(exec, first, last); - return thrust::copy(exec, temp.begin(), temp.end(), result); -} // end overlapped_copy() - -} // end dispatch - - -template - RandomAccessIterator2 overlapped_copy(RandomAccessIterator1 first, - RandomAccessIterator1 last, - RandomAccessIterator2 result) -{ - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - typedef typename thrust::detail::minimum_system::type System; - - // XXX presumes System is default constructible - System system; - - return thrust::detail::dispatch::overlapped_copy(system, first, last, result); -} // end overlapped_copy() - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/pair.inl b/compat/thrust/detail/pair.inl deleted file mode 100644 index 776bdc2315..0000000000 --- a/compat/thrust/detail/pair.inl +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace thrust -{ - -template - pair - ::pair(void) - :first(),second() -{ - ; -} // end pair::pair() - - -template - pair - ::pair(const T1 &x, const T2 &y) - :first(x),second(y) -{ - ; -} // end pair::pair() - - -template - template - pair - ::pair(const pair &p) - :first(p.first),second(p.second) -{ - ; -} // end pair::pair() - - -template - template - pair - ::pair(const std::pair &p) - :first(p.first),second(p.second) -{ - ; -} // end pair::pair() - - -template - inline __host__ __device__ - void pair - ::swap(thrust::pair &p) -{ - using thrust::swap; - - swap(first, p.first); - swap(second, p.second); -} // end pair::swap() - - -template - inline __host__ __device__ - bool operator==(const pair &x, const pair &y) -{ - return x.first == y.first && x.second == y.second; -} // end operator==() - - -template - inline __host__ __device__ - bool operator<(const pair &x, const pair &y) -{ - return x.first < y.first || (!(y.first < x.first) && x.second < y.second); -} // end operator<() - - -template - inline __host__ __device__ - bool operator!=(const pair &x, const pair &y) -{ - return !(x == y); -} // end operator==() - - -template - inline __host__ __device__ - bool operator>(const pair &x, const pair &y) -{ - return y < x; -} // end operator<() - - -template - inline __host__ __device__ - bool operator<=(const pair &x, const pair &y) -{ - return !(y < x); -} // end operator<=() - - -template - inline __host__ __device__ - bool operator>=(const pair &x, const pair &y) -{ - return !(x < y); -} // end operator>=() - - -template - inline __host__ __device__ - void swap(pair &x, pair &y) -{ - return x.swap(y); -} // end swap() - - -template - inline __host__ __device__ - pair make_pair(T1 x, T2 y) -{ - return pair(x,y); -} // end make_pair() - - -// specializations of tuple_element for pair -template - struct tuple_element<0, pair > -{ - typedef T1 type; -}; // end tuple_element - -template - struct tuple_element<1, pair > -{ - typedef T2 type; -}; // end tuple_element - - -// specialization of tuple_size for pair -template - struct tuple_size< pair > -{ - static const unsigned int value = 2; -}; // end tuple_size - - - -namespace detail -{ - - -template struct pair_get {}; - -template - struct pair_get<0, Pair> -{ - inline __host__ __device__ - const typename tuple_element<0, Pair>::type & - operator()(const Pair &p) const - { - return p.first; - } // end operator()() - - inline __host__ __device__ - typename tuple_element<0, Pair>::type & - operator()(Pair &p) const - { - return p.first; - } // end operator()() -}; // end pair_get - - -template - struct pair_get<1, Pair> -{ - inline __host__ __device__ - const typename tuple_element<1, Pair>::type & - operator()(const Pair &p) const - { - return p.second; - } // end operator()() - - inline __host__ __device__ - typename tuple_element<1, Pair>::type & - operator()(Pair &p) const - { - return p.second; - } // end operator()() -}; // end pair_get - -} // end detail - - - -template - inline __host__ __device__ - typename tuple_element >::type & - get(pair &p) -{ - return detail::pair_get >()(p); -} // end get() - -template - inline __host__ __device__ - const typename tuple_element >::type & - get(const pair &p) -{ - return detail::pair_get >()(p); -} // end get() - - -} // end thrust - diff --git a/compat/thrust/detail/partition.inl b/compat/thrust/detail/partition.inl deleted file mode 100644 index 19ef08a73c..0000000000 --- a/compat/thrust/detail/partition.inl +++ /dev/null @@ -1,398 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file partition.inl - * \brief Inline file for partition.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - ForwardIterator partition(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::partition; - return partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end partition() - - -template - ForwardIterator partition(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - using thrust::system::detail::generic::partition; - return partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred); -} // end partition() - - -template - thrust::pair - partition_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - using thrust::system::detail::generic::partition_copy; - return partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, out_true, out_false, pred); -} // end partition_copy() - - -template - thrust::pair - partition_copy(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - using thrust::system::detail::generic::partition_copy; - return partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, out_true, out_false, pred); -} // end partition_copy() - - -template - ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::stable_partition; - return stable_partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end stable_partition() - - -template - ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - using thrust::system::detail::generic::stable_partition; - return stable_partition(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred); -} // end stable_partition() - - -template - thrust::pair - stable_partition_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - using thrust::system::detail::generic::stable_partition_copy; - return stable_partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, out_true, out_false, pred); -} // end stable_partition_copy() - - -template - thrust::pair - stable_partition_copy(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - using thrust::system::detail::generic::stable_partition_copy; - return stable_partition_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, out_true, out_false, pred); -} // end stable_partition_copy() - - -template - ForwardIterator partition_point(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::partition_point; - return partition_point(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end partition_point() - - -template - bool is_partitioned(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::is_partitioned; - return is_partitioned(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end is_partitioned() - - -template - ForwardIterator partition(ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::partition(select_system(system), first, last, pred); -} // end partition() - - -template - ForwardIterator partition(ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::partition(select_system(system1,system2), first, last, stencil, pred); -} // end partition() - - -template - ForwardIterator stable_partition(ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::stable_partition(select_system(system), first, last, pred); -} // end stable_partition() - - -template - ForwardIterator stable_partition(ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::stable_partition(select_system(system1,system2), first, last, stencil, pred); -} // end stable_partition() - - -template - thrust::pair - partition_copy(InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::partition_copy(select_system(system1,system2,system3), first, last, out_true, out_false, pred); -} // end partition_copy() - - -template - thrust::pair - partition_copy(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::partition_copy(select_system(system1,system2,system3,system4), first, last, stencil, out_true, out_false, pred); -} // end partition_copy() - - -template - thrust::pair - stable_partition_copy(InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::stable_partition_copy(select_system(system1,system2,system3), first, last, out_true, out_false, pred); -} // end stable_partition_copy() - - -template - thrust::pair - stable_partition_copy(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::stable_partition_copy(select_system(system1,system2,system3,system4), first, last, stencil, out_true, out_false, pred); -} // end stable_partition_copy() - - -template - ForwardIterator partition_point(ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::partition_point(select_system(system), first, last, pred); -} // end partition_point() - - -template - bool is_partitioned(InputIterator first, - InputIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::is_partitioned(select_system(system), first, last, pred); -} // end is_partitioned() - - -} // end thrust - diff --git a/compat/thrust/detail/pointer.h b/compat/thrust/detail/pointer.h deleted file mode 100644 index bc97939c77..0000000000 --- a/compat/thrust/detail/pointer.h +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -// declare pointer with default values of template parameters -template class pointer; - -} // end thrust - - -// specialize std::iterator_traits to avoid problems with the name of -// pointer's constructor shadowing its nested pointer type -// do this before pointer is defined so the specialization is correctly -// used inside the definition -namespace std -{ - -template - struct iterator_traits > -{ - private: - typedef thrust::pointer ptr; - - public: - typedef typename ptr::iterator_category iterator_category; - typedef typename ptr::value_type value_type; - typedef typename ptr::difference_type difference_type; - // XXX implement this type (the result of operator->) later - typedef void pointer; - typedef typename ptr::reference reference; -}; // end iterator_traits - -} // end std - - -namespace thrust -{ - -namespace detail -{ - -// this metafunction computes the type of iterator_adaptor thrust::pointer should inherit from -template - struct pointer_base -{ - // void pointers should have no element type - // note that we remove_cv from the Element type to get the value_type - typedef typename thrust::detail::eval_if< - thrust::detail::is_void::type>::value, - thrust::detail::identity_, - thrust::detail::remove_cv - >::type value_type; - - // if no Derived type is given, just use pointer - typedef typename thrust::detail::eval_if< - thrust::detail::is_same::value, - thrust::detail::identity_ >, - thrust::detail::identity_ - >::type derived_type; - - // void pointers should have no reference type - // if no Reference type is given, just use reference - typedef typename thrust::detail::eval_if< - thrust::detail::is_void::type>::value, - thrust::detail::identity_, - thrust::detail::eval_if< - thrust::detail::is_same::value, - thrust::detail::identity_ >, - thrust::detail::identity_ - > - >::type reference_arg; - - typedef thrust::iterator_adaptor< - derived_type, // pass along the type of our Derived class to iterator_adaptor - Element *, // we adapt a raw pointer - value_type, // the value type - Tag, // system tag - thrust::random_access_traversal_tag, // pointers have random access traversal - reference_arg, // pass along our Reference type - std::ptrdiff_t - > type; -}; // end pointer_base - - -} // end detail - - -// the base type for all of thrust's tagged pointers. -// for reasonable pointer-like semantics, derived types should reimplement the following: -// 1. no-argument constructor -// 2. constructor from OtherElement * -// 3. constructor from OtherPointer related by convertibility -// 4. assignment from OtherPointer related by convertibility -// These should just call the corresponding members of pointer. -template - class pointer - : public thrust::detail::pointer_base::type -{ - private: - typedef typename thrust::detail::pointer_base::type super_t; - - typedef typename thrust::detail::pointer_base::derived_type derived_type; - - // friend iterator_core_access to give it access to dereference - friend class thrust::iterator_core_access; - - __host__ __device__ - typename super_t::reference dereference() const; - - // don't provide access to this part of super_t's interface - using super_t::base; - using typename super_t::base_type; - - public: - typedef typename super_t::base_type raw_pointer; - - // constructors - - __host__ __device__ - pointer(); - - // OtherValue shall be convertible to Value - // XXX consider making the pointer implementation a template parameter which defaults to Element * - template - __host__ __device__ - explicit pointer(OtherElement *ptr); - - // OtherPointer's element_type shall be convertible to Element - // OtherPointer's system shall be convertible to Tag - template - __host__ __device__ - pointer(const OtherPointer &other, - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer - >::type * = 0); - - // assignment - - // OtherPointer's element_type shall be convertible to Element - // OtherPointer's system shall be convertible to Tag - template - __host__ __device__ - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer, - derived_type & - >::type - operator=(const OtherPointer &other); - - // observers - - __host__ __device__ - Element *get() const; -}; // end pointer - -} // end thrust - -#include - diff --git a/compat/thrust/detail/pointer.inl b/compat/thrust/detail/pointer.inl deleted file mode 100644 index 1d066b041c..0000000000 --- a/compat/thrust/detail/pointer.inl +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - - -namespace thrust -{ - - -template - pointer - ::pointer() - : super_t(static_cast(0)) -{} // end pointer::pointer - - -template - template - pointer - ::pointer(OtherElement *other) - : super_t(other) -{} // end pointer::pointer - - -template - template - pointer - ::pointer(const OtherPointer &other, - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer - >::type *) - : super_t(thrust::detail::pointer_traits::get(other)) -{} // end pointer::pointer - - -template - template - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer, - typename pointer::derived_type & - >::type - pointer - ::operator=(const OtherPointer &other) -{ - super_t::base_reference() = thrust::detail::pointer_traits::get(other); - return static_cast(*this); -} // end pointer::operator= - - -template - typename pointer::super_t::reference - pointer - ::dereference() const -{ - return typename super_t::reference(static_cast(*this)); -} // end pointer::dereference - - -template - Element *pointer - ::get() const -{ - return super_t::base(); -} // end pointer::get - - -namespace detail -{ - -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) -// XXX WAR MSVC 2005 problem with correctly implementing -// pointer_raw_pointer for pointer by specializing it here -template - struct pointer_raw_pointer< thrust::pointer > -{ - typedef typename pointer::raw_pointer type; -}; // end pointer_raw_pointer -#endif - - -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION < 40200) -// XXX WAR g++-4.1 problem with correctly implementing -// pointer_element for pointer by specializing it here -template - struct pointer_element< thrust::pointer > -{ - typedef Element type; -}; // end pointer_element - -template - struct pointer_element< thrust::pointer > - : pointer_element< thrust::pointer > -{}; // end pointer_element - -template - struct pointer_element< thrust::pointer > - : pointer_element< thrust::pointer > -{}; // end pointer_element - - - -// XXX WAR g++-4.1 problem with correctly implementing -// rebind_pointer for pointer by specializing it here -template - struct rebind_pointer, NewElement> -{ - // XXX note we don't attempt to rebind the pointer's Reference type (or Derived) - typedef thrust::pointer type; -}; - -template - struct rebind_pointer, NewElement> - : rebind_pointer, NewElement> -{}; - -template - struct rebind_pointer, NewElement> - : rebind_pointer, NewElement> -{}; -#endif - -} // end namespace detail - - -} // end thrust - diff --git a/compat/thrust/detail/range/tail_flags.h b/compat/thrust/detail/range/tail_flags.h deleted file mode 100644 index 06fd9f8a00..0000000000 --- a/compat/thrust/detail/range/tail_flags.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template::type>, - typename IndexType = typename thrust::iterator_difference::type> - class tail_flags -{ - private: - struct tail_flag_functor - { - BinaryPredicate binary_pred; // this must be the first member for performance reasons - IndexType n; - - typedef bool result_type; - - tail_flag_functor(IndexType n) - : binary_pred(), n(n) - {} - - tail_flag_functor(IndexType n, BinaryPredicate binary_pred) - : binary_pred(binary_pred), n(n) - {} - - template - __host__ __device__ __thrust_forceinline__ - result_type operator()(const Tuple &t) - { - const IndexType i = thrust::get<0>(t); - - // note that we do not dereference the tuple's 2nd element when i >= n - // and therefore do not dereference a bad location at the boundary - return (i == (n - 1) || !binary_pred(thrust::get<1>(t), thrust::get<2>(t))); - } - }; - - typedef thrust::counting_iterator counting_iterator; - - public: - typedef thrust::transform_iterator< - tail_flag_functor, - thrust::zip_iterator > - > iterator; - - tail_flags(RandomAccessIterator first, RandomAccessIterator last) - : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), first, first + 1)), - tail_flag_functor(last - first))), - m_end(m_begin + (last - first)) - {} - - tail_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred) - : m_begin(thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), first, first + 1)), - tail_flag_functor(last - first, binary_pred))), - m_end(m_begin + (last - first)) - {} - - iterator begin() const - { - return m_begin; - } - - iterator end() const - { - return m_end; - } - - template - typename iterator::reference operator[](OtherIndex i) - { - return *(begin() + i); - } - - private: - iterator m_begin, m_end; -}; - - -template - tail_flags - make_tail_flags(RandomAccessIterator first, RandomAccessIterator last, BinaryPredicate binary_pred) -{ - return tail_flags(first, last, binary_pred); -} - - -template - tail_flags - make_tail_flags(RandomAccessIterator first, RandomAccessIterator last) -{ - return tail_flags(first, last); -} - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/raw_pointer_cast.h b/compat/thrust/detail/raw_pointer_cast.h deleted file mode 100644 index 05e1e6bc45..0000000000 --- a/compat/thrust/detail/raw_pointer_cast.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -template - inline __host__ __device__ typename thrust::detail::pointer_traits::raw_pointer - raw_pointer_cast(const Pointer &ptr) -{ - return thrust::detail::pointer_traits::get(ptr); -} // end raw_pointer_cast() - -} // end thrust - diff --git a/compat/thrust/detail/raw_reference_cast.h b/compat/thrust/detail/raw_reference_cast.h deleted file mode 100644 index 1ffd7e5701..0000000000 --- a/compat/thrust/detail/raw_reference_cast.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -__THRUST_DEFINE_HAS_NESTED_TYPE(is_wrapped_reference, wrapped_reference_hint) - -namespace raw_reference_detail -{ - -template - struct raw_reference - : add_reference -{}; - - -// XXX consider making raw_reference an error - - -template - struct raw_reference< - T, - typename thrust::detail::enable_if< - is_wrapped_reference< - typename remove_cv::type - >::value - >::type - > -{ - typedef typename add_reference< - typename pointer_element::type - >::type type; -}; - -} // end raw_reference_ns - -template - struct raw_reference : - raw_reference_detail::raw_reference -{}; - - -// wrapped reference-like things which aren't strictly wrapped references -// (e.g. tuples of wrapped references) are considered unwrappable -template - struct is_unwrappable - : is_wrapped_reference -{}; - - -template - struct enable_if_unwrappable - : enable_if< - is_unwrappable::value, - Result - > -{}; - - -} // end detail - - -template - inline __host__ __device__ typename detail::raw_reference::type raw_reference_cast(T &ref) -{ - return *thrust::raw_pointer_cast(&ref); -} // end raw_reference_cast - - -template - inline __host__ __device__ typename detail::raw_reference::type raw_reference_cast(const T &ref) -{ - return *thrust::raw_pointer_cast(&ref); -} // end raw_reference_cast - - -template< - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> -inline __host__ __device__ -typename detail::enable_if_unwrappable< - thrust::detail::tuple_of_iterator_references, - typename detail::raw_reference< - thrust::detail::tuple_of_iterator_references - >::type ->::type -raw_reference_cast(detail::tuple_of_iterator_references t); - - -} // end thrust - -#include - diff --git a/compat/thrust/detail/raw_reference_cast.inl b/compat/thrust/detail/raw_reference_cast.inl deleted file mode 100644 index ea619ec028..0000000000 --- a/compat/thrust/detail/raw_reference_cast.inl +++ /dev/null @@ -1,277 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -// specialize is_unwrappable -// a tuple is_unwrappable if any of its elements is_unwrappable -template< - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> - struct is_unwrappable< - thrust::tuple - > - : or_< - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable - > -{}; - - -// specialize is_unwrappable -// a tuple_of_iterator_references is_unwrappable if any of its elements is_unwrappable -template< - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> - struct is_unwrappable< - thrust::detail::tuple_of_iterator_references - > - : or_< - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable, - is_unwrappable - > -{}; - - -namespace raw_reference_detail -{ - -// unlike raw_reference, -// raw_reference_tuple_helper needs to return a value -// when it encounters one, rather than a reference -// upon encountering tuple, recurse -// -// we want the following behavior: -// 1. T -> T -// 2. T& -> T& -// 3. null_type -> null_type -// 4. reference -> T& -// 5. tuple_of_iterator_references -> tuple_of_iterator_references::type> - - -// wrapped references are unwrapped using raw_reference, otherwise, return T -template - struct raw_reference_tuple_helper - : eval_if< - is_unwrappable< - typename remove_cv::type - >::value, - raw_reference, - identity_ - > -{}; - - -// recurse on tuples -template < - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> - struct raw_reference_tuple_helper< - thrust::tuple - > -{ - typedef thrust::tuple< - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type - > type; -}; - - -template < - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> - struct raw_reference_tuple_helper< - thrust::detail::tuple_of_iterator_references - > -{ - typedef thrust::detail::tuple_of_iterator_references< - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type, - typename raw_reference_tuple_helper::type - > type; -}; - - -} // end raw_reference_detail - - -// if a tuple "tuple_type" is_unwrappable, -// then the raw_reference of tuple_type is a tuple of its members' raw_references -// else the raw_reference of tuple_type is tuple_type & -template < - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> - struct raw_reference< - thrust::tuple - > -{ - private: - typedef thrust::tuple tuple_type; - - public: - typedef typename eval_if< - is_unwrappable::value, - raw_reference_detail::raw_reference_tuple_helper, - add_reference - >::type type; -}; - - -template < - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> - struct raw_reference< - thrust::detail::tuple_of_iterator_references - > -{ - private: - typedef detail::tuple_of_iterator_references tuple_type; - - public: - typedef typename raw_reference_detail::raw_reference_tuple_helper::type type; - - // XXX figure out why is_unwrappable seems to be broken for tuple_of_iterator_references - //typedef typename eval_if< - // is_unwrappable::value, - // raw_reference_detail::raw_reference_tuple_helper, - // add_reference - //>::type type; -}; - - -struct raw_reference_caster -{ - template - __host__ __device__ - typename detail::raw_reference::type operator()(T &ref) - { - return thrust::raw_reference_cast(ref); - } - - template - __host__ __device__ - typename detail::raw_reference::type operator()(const T &ref) - { - return thrust::raw_reference_cast(ref); - } - - template< - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 - > - __host__ __device__ - typename detail::raw_reference< - thrust::detail::tuple_of_iterator_references - >::type - operator()(thrust::detail::tuple_of_iterator_references t, - typename enable_if< - is_unwrappable >::value - >::type * = 0) - { - return thrust::raw_reference_cast(t); - } -}; // end raw_reference_caster - - -} // end detail - - -template< - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> -__host__ __device__ -typename detail::enable_if_unwrappable< - thrust::detail::tuple_of_iterator_references, - typename detail::raw_reference< - thrust::detail::tuple_of_iterator_references - >::type ->::type -raw_reference_cast(thrust::detail::tuple_of_iterator_references t) -{ - thrust::detail::raw_reference_caster f; - - // note that we pass raw_reference_tuple_helper, not raw_reference as the unary metafunction - // the subtle difference is important - return thrust::detail::tuple_host_device_transform(t, f); -} // end raw_reference_cast - - -} // end thrust - diff --git a/compat/thrust/detail/reduce.inl b/compat/thrust/detail/reduce.inl deleted file mode 100644 index ba84423475..0000000000 --- a/compat/thrust/detail/reduce.inl +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.inl - * \brief Inline file for reduce.h. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - typename thrust::iterator_traits::value_type - reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last) -{ - using thrust::system::detail::generic::reduce; - return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end reduce() - - -template - T reduce(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - T init) -{ - using thrust::system::detail::generic::reduce; - return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init); -} // end reduce() - - -template - T reduce(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - T init, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::reduce; - return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init, binary_op); -} // end reduce() - - -template - thrust::pair - reduce_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output) -{ - using thrust::system::detail::generic::reduce_by_key; - return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output); -} // end reduce_by_key() - - -template - thrust::pair - reduce_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::reduce_by_key; - return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred); -} // end reduce_by_key() - - -template - thrust::pair - reduce_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::reduce_by_key; - return reduce_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); -} // end reduce_by_key() - - -template -typename thrust::iterator_traits::value_type - reduce(InputIterator first, - InputIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::reduce(select_system(system), first, last); -} - -template - T reduce(InputIterator first, - InputIterator last, - T init) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::reduce(select_system(system), first, last, init); -} - - -template - T reduce(InputIterator first, - InputIterator last, - T init, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::reduce(select_system(system), first, last, init, binary_op); -} - -template - thrust::pair - reduce_by_key(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output); -} - -template - thrust::pair - reduce_by_key(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred); -} - -template - thrust::pair - reduce_by_key(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::reduce_by_key(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); -} - -} // end namespace thrust - diff --git a/compat/thrust/detail/reference.h b/compat/thrust/detail/reference.h deleted file mode 100644 index 8c0b06186f..0000000000 --- a/compat/thrust/detail/reference.h +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - - -namespace thrust -{ -namespace detail -{ - -template struct is_wrapped_reference; - -} - -// the base type for all of thrust's system-annotated references. -// for reasonable reference-like semantics, derived types must reimplement the following: -// 1. constructor from pointer -// 2. copy constructor -// 3. templated copy constructor from other reference -// 4. templated assignment from other reference -// 5. assignment from value_type -template - class reference -{ - private: - typedef typename thrust::detail::eval_if< - thrust::detail::is_same::value, - thrust::detail::identity_, - thrust::detail::identity_ - >::type derived_type; - - // hint for is_wrapped_reference lets it know that this type (or a derived type) - // is a wrapped reference - struct wrapped_reference_hint {}; - template friend struct thrust::detail::is_wrapped_reference; - - public: - typedef Pointer pointer; - typedef typename thrust::detail::remove_const::type value_type; - - __host__ __device__ - explicit reference(const pointer &ptr); - - template - __host__ __device__ - reference(const reference &other, - typename thrust::detail::enable_if_convertible< - typename reference::pointer, - pointer - >::type * = 0); - - __host__ __device__ - derived_type &operator=(const reference &other); - - // XXX this may need an enable_if - template - __host__ __device__ - derived_type &operator=(const reference &other); - - __host__ __device__ - derived_type &operator=(const value_type &x); - - __host__ __device__ - pointer operator&() const; - - __host__ __device__ - operator value_type () const; - - __host__ __device__ - void swap(derived_type &other); - - derived_type &operator++(); - - value_type operator++(int); - - // XXX parameterize the type of rhs - derived_type &operator+=(const value_type &rhs); - - derived_type &operator--(); - - value_type operator--(int); - - // XXX parameterize the type of rhs - derived_type &operator-=(const value_type &rhs); - - // XXX parameterize the type of rhs - derived_type &operator*=(const value_type &rhs); - - // XXX parameterize the type of rhs - derived_type &operator/=(const value_type &rhs); - - // XXX parameterize the type of rhs - derived_type &operator%=(const value_type &rhs); - - // XXX parameterize the type of rhs - derived_type &operator<<=(const value_type &rhs); - - // XXX parameterize the type of rhs - derived_type &operator>>=(const value_type &rhs); - - // XXX parameterize the type of rhs - derived_type &operator&=(const value_type &rhs); - - // XXX parameterize the type of rhs - derived_type &operator|=(const value_type &rhs); - - // XXX parameterize the type of rhs - derived_type &operator^=(const value_type &rhs); - - private: - const pointer m_ptr; - - // allow access to m_ptr for other references - template friend class reference; - - template - __host__ __device__ - inline value_type strip_const_get_value(const System &system) const; - - template - __host__ __device__ - inline void assign_from(OtherPointer src); - - // XXX this helper exists only to avoid warnings about null references from the other assign_from - template - inline __host__ __device__ - void assign_from(System1 *system1, System2 *system2, OtherPointer src); - - template - __host__ __device__ - inline void strip_const_assign_value(const System &system, OtherPointer src); - - // XXX this helper exists only to avoid warnings about null references from the other swap - template - inline __host__ __device__ - void swap(System *system, derived_type &other); - - // XXX this helper exists only to avoid warnings about null references from operator value_type () - template - inline __host__ __device__ - value_type convert_to_value_type(System *system) const; -}; // end reference - - -} // end thrust - -#include - diff --git a/compat/thrust/detail/reference.inl b/compat/thrust/detail/reference.inl deleted file mode 100644 index 8b55edb712..0000000000 --- a/compat/thrust/detail/reference.inl +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace thrust -{ - - -template - template - reference - ::reference(const reference &other, - typename thrust::detail::enable_if_convertible< - typename reference::pointer, - pointer - >::type *) - : m_ptr(other.m_ptr) -{} - - -template - reference - ::reference(const pointer &ptr) - : m_ptr(ptr) -{} - - -template - typename reference::pointer - reference - ::operator&() const -{ - return m_ptr; -} // end reference::operator&() - - -template - typename reference::derived_type & - reference - ::operator=(const value_type &v) -{ - assign_from(&v); - return static_cast(*this); -} // end reference::operator=() - - -template - typename reference::derived_type & - reference - ::operator=(const reference &other) -{ - assign_from(&other); - return static_cast(*this); -} // end reference::operator=() - - -template - template - typename reference::derived_type & - reference - ::operator=(const reference &other) -{ - assign_from(&other); - return static_cast(*this); -} // end reference::operator=() - - -template - template - typename reference::value_type - reference - ::convert_to_value_type(System *system) const -{ - using thrust::system::detail::generic::select_system; - return strip_const_get_value(select_system(*system)); -} // end convert_to_value_type() - - -template - reference - ::operator typename reference::value_type () const -{ - typedef typename thrust::iterator_system::type System; - - // XXX avoid default-constructing a system - // XXX use null a reference for dispatching - // XXX this assumes that the eventual invocation of - // XXX get_value will not access system state - System *system = 0; - - return convert_to_value_type(system); -} // end reference::operator value_type () - - -template - template - typename reference::value_type - reference - ::strip_const_get_value(const System &system) const -{ - System &non_const_system = const_cast(system); - - using thrust::system::detail::generic::get_value; - - return get_value(thrust::detail::derived_cast(non_const_system), m_ptr); -} // end reference::strip_const_get_value() - - -template - template - void reference - ::assign_from(System1 *system1, System2 *system2, OtherPointer src) -{ - using thrust::system::detail::generic::select_system; - - strip_const_assign_value(select_system(*system1, *system2), src); -} // end assign_from() - - -template - template - void reference - ::assign_from(OtherPointer src) -{ - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - // XXX avoid default-constructing a system - // XXX use null references for dispatching - // XXX this assumes that the eventual invocation of - // XXX assign_value will not access system state - System1 *system1 = 0; - System2 *system2 = 0; - - assign_from(system1, system2, src); -} // end assign_from() - - -template - template - void reference - ::strip_const_assign_value(const System &system, OtherPointer src) -{ - System &non_const_system = const_cast(system); - - using thrust::system::detail::generic::assign_value; - - assign_value(thrust::detail::derived_cast(non_const_system), m_ptr, src); -} // end strip_const_assign_value() - - -template - template - void reference - ::swap(System *system, derived_type &other) -{ - using thrust::system::detail::generic::select_system; - using thrust::system::detail::generic::iter_swap; - - iter_swap(select_system(*system, *system), m_ptr, other.m_ptr); -} // end reference::swap() - - -template - void reference - ::swap(derived_type &other) -{ - typedef typename thrust::iterator_system::type System; - - // XXX avoid default-constructing a system - // XXX use null references for dispatching - // XXX this assumes that the eventual invocation - // XXX of iter_swap will not access system state - System *system = 0; - - swap(system, other); -} // end reference::swap() - - -template - typename reference::derived_type & - reference - ::operator++(void) -{ - value_type temp = *this; - ++temp; - *this = temp; - return static_cast(*this); -} // end reference::operator++() - - -template - typename reference::value_type - reference - ::operator++(int) -{ - value_type temp = *this; - value_type result = temp++; - *this = temp; - return result; -} // end reference::operator++() - - -template - typename reference::derived_type & - reference - ::operator+=(const value_type &rhs) -{ - value_type temp = *this; - temp += rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator+=() - -template - typename reference::derived_type & - reference - ::operator--(void) -{ - value_type temp = *this; - --temp; - *this = temp; - return static_cast(*this); -} // end reference::operator--() - -template - typename reference::value_type - reference - ::operator--(int) -{ - value_type temp = *this; - value_type result = temp--; - *this = temp; - return result; -} // end reference::operator--() - -template - typename reference::derived_type & - reference - ::operator-=(const value_type &rhs) -{ - value_type temp = *this; - temp -= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator-=() - -template - typename reference::derived_type & - reference - ::operator*=(const value_type &rhs) -{ - value_type temp = *this; - temp *= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator*=() - -template - typename reference::derived_type & - reference - ::operator/=(const value_type &rhs) -{ - value_type temp = *this; - temp /= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator/=() - -template - typename reference::derived_type & - reference - ::operator%=(const value_type &rhs) -{ - value_type temp = *this; - temp %= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator%=() - -template - typename reference::derived_type & - reference - ::operator<<=(const value_type &rhs) -{ - value_type temp = *this; - temp <<= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator<<=() - -template - typename reference::derived_type & - reference - ::operator>>=(const value_type &rhs) -{ - value_type temp = *this; - temp >>= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator>>=() - -template - typename reference::derived_type & - reference - ::operator&=(const value_type &rhs) -{ - value_type temp = *this; - temp &= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator&=() - -template - typename reference::derived_type & - reference - ::operator|=(const value_type &rhs) -{ - value_type temp = *this; - temp |= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator|=() - -template - typename reference::derived_type & - reference - ::operator^=(const value_type &rhs) -{ - value_type temp = *this; - temp ^= rhs; - *this = temp; - return static_cast(*this); -} // end reference::operator^=() - - -} // end thrust - diff --git a/compat/thrust/detail/reference_forward_declaration.h b/compat/thrust/detail/reference_forward_declaration.h deleted file mode 100644 index 60524d3ec2..0000000000 --- a/compat/thrust/detail/reference_forward_declaration.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -template class reference; - -} // end thrust - diff --git a/compat/thrust/detail/remove.inl b/compat/thrust/detail/remove.inl deleted file mode 100644 index 5675243fbd..0000000000 --- a/compat/thrust/detail/remove.inl +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file remove.inl - * \brief Inline file for remove.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - ForwardIterator remove(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &value) -{ - using thrust::system::detail::generic::remove; - return remove(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value); -} // end remove() - - -template - OutputIterator remove_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - const T &value) -{ - using thrust::system::detail::generic::remove_copy; - return remove_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, value); -} // end remove_copy() - - -template - ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::remove_if; - return remove_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred); -} // end remove_if() - - -template - OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::remove_copy_if; - return remove_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred); -} // end remove_copy_if() - - -template - ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - using thrust::system::detail::generic::remove_if; - return remove_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred); -} // end remove_if() - - -template - OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::remove_copy_if; - return remove_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred); -} // end remove_copy_if() - - -template - ForwardIterator remove(ForwardIterator first, - ForwardIterator last, - const T &value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::remove(select_system(system), first, last, value); -} // end remove() - - -template - OutputIterator remove_copy(InputIterator first, - InputIterator last, - OutputIterator result, - const T &value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::remove_copy(select_system(system1,system2), first, last, result, value); -} // end remove_copy() - - -template - ForwardIterator remove_if(ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::remove_if(select_system(system), first, last, pred); -} // end remove_if() - - -template - ForwardIterator remove_if(ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::remove_if(select_system(system1,system2), first, last, stencil, pred); -} // end remove_if() - - -template - OutputIterator remove_copy_if(InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::remove_copy_if(select_system(system1,system2), first, last, result, pred); -} // end remove_copy_if() - - -template - OutputIterator remove_copy_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::remove_copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred); -} // end remove_copy_if() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/replace.inl b/compat/thrust/detail/replace.inl deleted file mode 100644 index 1eaf24d621..0000000000 --- a/compat/thrust/detail/replace.inl +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file replace.inl - * \brief Inline file for replace.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void replace(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, ForwardIterator last, - const T &old_value, - const T &new_value) -{ - using thrust::system::detail::generic::replace; - return replace(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, old_value, new_value); -} // end replace() - - -template - void replace_if(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, ForwardIterator last, - Predicate pred, - const T &new_value) -{ - using thrust::system::detail::generic::replace_if; - return replace_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, pred, new_value); -} // end replace_if() - - -template - void replace_if(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, ForwardIterator last, - InputIterator stencil, - Predicate pred, - const T &new_value) -{ - using thrust::system::detail::generic::replace_if; - return replace_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, pred, new_value); -} // end replace_if() - - -template - OutputIterator replace_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result, - const T &old_value, - const T &new_value) -{ - using thrust::system::detail::generic::replace_copy; - return replace_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, old_value, new_value); -} // end replace_copy() - - -template - OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result, - Predicate pred, - const T &new_value) -{ - using thrust::system::detail::generic::replace_copy_if; - return replace_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, pred, new_value); -} // end replace_copy_if() - - -template - OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred, - const T &new_value) -{ - using thrust::system::detail::generic::replace_copy_if; - return replace_copy_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, pred, new_value); -} // end replace_copy_if() - - -template - OutputIterator replace_copy_if(InputIterator first, InputIterator last, - OutputIterator result, - Predicate pred, - const T &new_value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::replace_copy_if(select_system(system1,system2), first, last, result, pred, new_value); -} // end replace_copy_if() - - -template - OutputIterator replace_copy_if(InputIterator1 first, InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred, - const T &new_value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::replace_copy_if(select_system(system1,system2,system3), first, last, stencil, result, pred, new_value); -} // end replace_copy_if() - - -template - OutputIterator replace_copy(InputIterator first, InputIterator last, - OutputIterator result, - const T &old_value, - const T &new_value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::replace_copy(select_system(system1,system2), first, last, result, old_value, new_value); -} // end replace_copy() - - -template - void replace_if(ForwardIterator first, ForwardIterator last, - Predicate pred, - const T &new_value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::replace_if(select_system(system), first, last, pred, new_value); -} // end replace_if() - - -template - void replace_if(ForwardIterator first, ForwardIterator last, - InputIterator stencil, - Predicate pred, - const T &new_value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::replace_if(select_system(system1,system2), first, last, stencil, pred, new_value); -} // end replace_if() - - -template - void replace(ForwardIterator first, ForwardIterator last, - const T &old_value, - const T &new_value) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::replace(select_system(system), first, last, old_value, new_value); -} // end replace() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/reverse.inl b/compat/thrust/detail/reverse.inl deleted file mode 100644 index 18c26c00e6..0000000000 --- a/compat/thrust/detail/reverse.inl +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reverse.inl - * \brief Inline file for reverse.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void reverse(const thrust::detail::execution_policy_base &exec, - BidirectionalIterator first, - BidirectionalIterator last) -{ - using thrust::system::detail::generic::reverse; - return reverse(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end reverse() - - -template - OutputIterator reverse_copy(const thrust::detail::execution_policy_base &exec, - BidirectionalIterator first, - BidirectionalIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::reverse_copy; - return reverse_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); -} // end reverse_copy() - - -template - void reverse(BidirectionalIterator first, - BidirectionalIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::reverse(select_system(system), first, last); -} // end reverse() - - -template - OutputIterator reverse_copy(BidirectionalIterator first, - BidirectionalIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::reverse_copy(select_system(system1,system2), first, last, result); -} // end reverse_copy() - - -} // end thrust - diff --git a/compat/thrust/detail/scan.inl b/compat/thrust/detail/scan.inl deleted file mode 100644 index 3e5fd9b4f7..0000000000 --- a/compat/thrust/detail/scan.inl +++ /dev/null @@ -1,502 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan.inl - * \brief Inline file for scan.h. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::inclusive_scan; - return inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); -} // end inclusive_scan() - - -template - OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::inclusive_scan; - return inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, binary_op); -} // end inclusive_scan() - - -template - OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::exclusive_scan; - return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); -} // end exclusive_scan() - - -template - OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init) -{ - using thrust::system::detail::generic::exclusive_scan; - return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init); -} // end exclusive_scan() - - -template - OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::exclusive_scan; - return exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, init, binary_op); -} // end exclusive_scan() - - -template - OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result) -{ - using thrust::system::detail::generic::inclusive_scan_by_key; - return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result); -} // end inclusive_scan_by_key() - - -template - OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::inclusive_scan_by_key; - return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, binary_pred); -} // end inclusive_scan_by_key() - - -template - OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::inclusive_scan_by_key; - return inclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, binary_pred, binary_op); -} // end inclusive_scan_by_key() - - -template - OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result) -{ - using thrust::system::detail::generic::exclusive_scan_by_key; - return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result); -} // end exclusive_scan_by_key() - - -template - OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init) -{ - using thrust::system::detail::generic::exclusive_scan_by_key; - return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init); -} // end exclusive_scan_by_key() - - -template - OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::exclusive_scan_by_key; - return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init, binary_pred); -} // end exclusive_scan_by_key() - - -template - OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::exclusive_scan_by_key; - return exclusive_scan_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, init, binary_pred, binary_op); -} // end exclusive_scan_by_key() - - -template - OutputIterator inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::inclusive_scan(select_system(system1,system2), first, last, result); -} // end inclusive_scan() - - -template - OutputIterator inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::inclusive_scan(select_system(system1,system2), first, last, result, binary_op); -} // end inclusive_scan() - - -template - OutputIterator exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::exclusive_scan(select_system(system1,system2), first, last, result); -} // end exclusive_scan() - - -template - OutputIterator exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - T init) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::exclusive_scan(select_system(system1,system2), first, last, result, init); -} // end exclusive_scan() - - -template - OutputIterator exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - T init, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::exclusive_scan(select_system(system1,system2), first, last, result, init, binary_op); -} // end exclusive_scan() - - -template - OutputIterator inclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result); -} - - -template - OutputIterator inclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, binary_pred); -} - - -template - OutputIterator inclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::inclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, binary_pred, binary_op); -} - - -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result); -} - - -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init); -} - - -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init, binary_pred); -} - - -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::exclusive_scan_by_key(select_system(system1,system2,system3), first1, last1, first2, result, init, binary_pred, binary_op); -} - - -} // end namespace thrust - diff --git a/compat/thrust/detail/scatter.inl b/compat/thrust/detail/scatter.inl deleted file mode 100644 index 934addb727..0000000000 --- a/compat/thrust/detail/scatter.inl +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scatter.inl - * \brief Inline file for scatter.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void scatter(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - RandomAccessIterator output) -{ - using thrust::system::detail::generic::scatter; - return scatter(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, output); -} // end scatter() - - -template - void scatter_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output) -{ - using thrust::system::detail::generic::scatter_if; - return scatter_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, stencil, output); -} // end scatter_if() - - -template - void scatter_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output, - Predicate pred) -{ - using thrust::system::detail::generic::scatter_if; - return scatter_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, map, stencil, output, pred); -} // end scatter_if() - - -template - void scatter(InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - RandomAccessIterator output) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::scatter(select_system(system1,system2,system3), first, last, map, output); -} // end scatter() - - -template - void scatter_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::scatter_if(select_system(system1,system2,system3,system4), first, last, map, stencil, output); -} // end scatter_if() - - -template - void scatter_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::scatter_if(select_system(system1,system2,system3,system4), first, last, map, stencil, output, pred); -} // end scatter_if() - -} // end namespace thrust - diff --git a/compat/thrust/detail/sequence.inl b/compat/thrust/detail/sequence.inl deleted file mode 100644 index f1741877f8..0000000000 --- a/compat/thrust/detail/sequence.inl +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file sequence.inl - * \brief Inline file for sequence.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void sequence(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last) -{ - using thrust::system::detail::generic::sequence; - return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end sequence() - - -template - void sequence(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - T init) -{ - using thrust::system::detail::generic::sequence; - return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init); -} // end sequence() - - -template - void sequence(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - T init, - T step) -{ - using thrust::system::detail::generic::sequence; - return sequence(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, init, step); -} // end sequence() - - -template - void sequence(ForwardIterator first, - ForwardIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::sequence(select_system(system), first, last); -} // end sequence() - - -template - void sequence(ForwardIterator first, - ForwardIterator last, - T init) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::sequence(select_system(system), first, last, init); -} // end sequence() - - -template - void sequence(ForwardIterator first, - ForwardIterator last, - T init, - T step) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::sequence(select_system(system), first, last, init, step); -} // end sequence() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/set_operations.inl b/compat/thrust/detail/set_operations.inl deleted file mode 100644 index daec46156d..0000000000 --- a/compat/thrust/detail/set_operations.inl +++ /dev/null @@ -1,836 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file set_operations.inl - * \brief Inline file for set_operations.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::set_difference; - return set_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); -} // end set_difference() - - -template - OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::set_difference; - return set_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); -} // end set_difference() - - -template - thrust::pair - set_difference_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::set_difference_by_key; - return set_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); -} // end set_difference_by_key() - - -template - thrust::pair - set_difference_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::set_difference_by_key; - return set_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} // end set_difference_by_key() - - -template - OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::set_intersection; - return set_intersection(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); -} // end set_intersection() - - -template - OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::set_intersection; - return set_intersection(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); -} // end set_intersection() - - -template - thrust::pair - set_intersection_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::set_intersection_by_key; - return set_intersection_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result); -} // end set_intersection_by_key() - - -template - thrust::pair - set_intersection_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::set_intersection_by_key; - return set_intersection_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, comp); -} // end set_intersection_by_key() - - -template - OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::set_symmetric_difference; - return set_symmetric_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); -} // end set_symmetric_difference() - - -template - OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::set_symmetric_difference; - return set_symmetric_difference(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); -} // end set_symmetric_difference() - - -template - thrust::pair - set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::set_symmetric_difference_by_key; - return set_symmetric_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); -} // end set_symmetric_difference_by_key() - - -template - thrust::pair - set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::set_symmetric_difference_by_key; - return set_symmetric_difference_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} // end set_symmetric_difference_by_key() - - -template - OutputIterator set_union(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::set_union; - return set_union(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result); -} // end set_union() - - -template - OutputIterator set_union(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::set_union; - return set_union(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, last2, result, comp); -} // end set_union() - - -template - thrust::pair - set_union_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::set_union_by_key; - return set_union_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); -} // end set_union_by_key() - - -template - thrust::pair - set_union_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp) -{ - using thrust::system::detail::generic::set_union_by_key; - return set_union_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} // end set_union_by_key() - - -template - OutputIterator set_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::set_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); -} // end set_difference() - - -template - OutputIterator set_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::set_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result); -} // end set_difference() - - -template - thrust::pair - set_difference_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - typedef typename thrust::iterator_system::type System6; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - System6 system6; - - return thrust::set_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} // end set_difference_by_key() - - -template - thrust::pair - set_difference_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - typedef typename thrust::iterator_system::type System6; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - System6 system6; - - return thrust::set_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); -} // end set_difference_by_key() - - -template - OutputIterator set_intersection(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::set_intersection(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); -} // end set_intersection() - - -template - OutputIterator set_intersection(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::set_intersection(select_system(system1,system2,system3), first1, last1, first2, last2, result); -} // end set_intersection() - - -template - thrust::pair - set_intersection_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - - return thrust::set_intersection_by_key(select_system(system1,system2,system3,system4,system5), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, comp); -} // end set_intersection_by_key() - - -template - thrust::pair - set_intersection_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - - return thrust::set_intersection_by_key(select_system(system1,system2,system3,system4,system5), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result); -} // end set_intersection_by_key() - - -template - OutputIterator set_symmetric_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::set_symmetric_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); -} // end set_symmetric_difference() - - -template - OutputIterator set_symmetric_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::set_symmetric_difference(select_system(system1,system2,system3), first1, last1, first2, last2, result); -} // end set_symmetric_difference() - - -template - thrust::pair - set_symmetric_difference_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - typedef typename thrust::iterator_system::type System6; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - System6 system6; - - return thrust::set_symmetric_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} // end set_symmetric_difference_by_key() - - -template - thrust::pair - set_symmetric_difference_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - typedef typename thrust::iterator_system::type System6; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - System6 system6; - - return thrust::set_symmetric_difference_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); -} // end set_symmetric_difference_by_key() - - -template - OutputIterator set_union(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::set_union(select_system(system1,system2,system3), first1, last1, first2, last2, result, comp); -} // end set_union() - - -template - OutputIterator set_union(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::set_union(select_system(system1,system2,system3), first1, last1, first2, last2, result); -} // end set_union() - - -template - thrust::pair - set_union_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - typedef typename thrust::iterator_system::type System6; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - System6 system6; - - return thrust::set_union_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} // end set_union_by_key() - - -template - thrust::pair - set_union_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - typedef typename thrust::iterator_system::type System5; - typedef typename thrust::iterator_system::type System6; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - System5 system5; - System6 system6; - - return thrust::set_union_by_key(select_system(system1,system2,system3,system4,system5,system6), keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result); -} // end set_union_by_key() - - -} // end thrust - diff --git a/compat/thrust/detail/sort.inl b/compat/thrust/detail/sort.inl deleted file mode 100644 index 08be55a8ce..0000000000 --- a/compat/thrust/detail/sort.inl +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file sort.inl - * \brief Inline file for sort.h. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void sort(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator first, - RandomAccessIterator last) -{ - using thrust::system::detail::generic::sort; - return sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end sort() - - -template - void sort(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::sort; - return sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); -} // end sort() - - -template - void stable_sort(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator first, - RandomAccessIterator last) -{ - using thrust::system::detail::generic::stable_sort; - return stable_sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end stable_sort() - - -template - void stable_sort(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::stable_sort; - return stable_sort(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); -} // end stable_sort() - - -template - void sort_by_key(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - using thrust::system::detail::generic::sort_by_key; - return sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first); -} // end sort_by_key() - - -template - void sort_by_key(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::sort_by_key; - return sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, comp); -} // end sort_by_key() - - -template - void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - using thrust::system::detail::generic::stable_sort_by_key; - return stable_sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first); -} // end stable_sort_by_key() - - -template - void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::stable_sort_by_key; - return stable_sort_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, comp); -} // end stable_sort_by_key() - - -template - bool is_sorted(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last) -{ - using thrust::system::detail::generic::is_sorted; - return is_sorted(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end is_sorted() - - -template - bool is_sorted(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Compare comp) -{ - using thrust::system::detail::generic::is_sorted; - return is_sorted(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); -} // end is_sorted() - - -template - ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last) -{ - using thrust::system::detail::generic::is_sorted_until; - return is_sorted_until(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end is_sorted_until() - - -template - ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Compare comp) -{ - using thrust::system::detail::generic::is_sorted_until; - return is_sorted_until(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, comp); -} // end is_sorted_until() - - -/////////////// -// Key Sorts // -/////////////// - -template - void sort(RandomAccessIterator first, - RandomAccessIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::sort(select_system(system), first, last); -} // end sort() - - -template - void sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::sort(select_system(system), first, last, comp); -} // end sort() - - -template - void stable_sort(RandomAccessIterator first, - RandomAccessIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::stable_sort(select_system(system), first, last); -} // end stable_sort() - - -template - void stable_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::stable_sort(select_system(system), first, last, comp); -} // end stable_sort() - - - -///////////////////// -// Key-Value Sorts // -///////////////////// - -template - void sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first); -} // end sort_by_key() - - -template - void sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first, comp); -} // end sort_by_key() - - -template - void stable_sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::stable_sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first); -} // end stable_sort_by_key() - - -template - void stable_sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::stable_sort_by_key(select_system(system1,system2), keys_first, keys_last, values_first, comp); -} // end stable_sort_by_key() - - -template - bool is_sorted(ForwardIterator first, - ForwardIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::is_sorted(select_system(system), first, last); -} // end is_sorted() - - -template - bool is_sorted(ForwardIterator first, - ForwardIterator last, - Compare comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::is_sorted(select_system(system), first, last, comp); -} // end is_sorted() - - -template - ForwardIterator is_sorted_until(ForwardIterator first, - ForwardIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::is_sorted_until(select_system(system), first, last); -} // end is_sorted_until() - - -template - ForwardIterator is_sorted_until(ForwardIterator first, - ForwardIterator last, - Compare comp) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::is_sorted_until(select_system(system), first, last, comp); -} // end is_sorted_until() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/static_assert.h b/compat/thrust/detail/static_assert.h deleted file mode 100644 index ccc084286c..0000000000 --- a/compat/thrust/detail/static_assert.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -/* - * (C) Copyright John Maddock 2000. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -// -// Helper macro THRUST_JOIN (based on BOOST_JOIN): -// The following piece of macro magic joins the two -// arguments together, even when one of the arguments is -// itself a macro (see 16.3.1 in C++ standard). The key -// is that macro expansion of macro arguments does not -// occur in THRUST_DO_JOIN2 but does in THRUST_DO_JOIN. -// -#define THRUST_JOIN( X, Y ) THRUST_DO_JOIN( X, Y ) -#define THRUST_DO_JOIN( X, Y ) THRUST_DO_JOIN2(X,Y) -#define THRUST_DO_JOIN2( X, Y ) X##Y - -namespace thrust -{ - -namespace detail -{ - -// HP aCC cannot deal with missing names for template value parameters -template struct STATIC_ASSERTION_FAILURE; - -template <> struct STATIC_ASSERTION_FAILURE { enum { value = 1 }; }; - -// HP aCC cannot deal with missing names for template value parameters -template struct static_assert_test{}; - -template - struct depend_on_instantiation -{ - static const bool value = x; -}; - -} // end detail - -} // end thrust - -#define THRUST_STATIC_ASSERT( B ) \ - typedef ::thrust::detail::static_assert_test<\ - sizeof(::thrust::detail::STATIC_ASSERTION_FAILURE< (bool)( B ) >)>\ - THRUST_JOIN(thrust_static_assert_typedef_, __LINE__) - diff --git a/compat/thrust/detail/swap.h b/compat/thrust/detail/swap.h deleted file mode 100644 index 9f82ac247a..0000000000 --- a/compat/thrust/detail/swap.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include - -namespace thrust -{ - -template -__host__ __device__ -inline void swap(Assignable1 &a, Assignable2 &b) -{ - Assignable1 temp = a; - a = b; - b = temp; -} // end swap() - -} // end namespace thrust - diff --git a/compat/thrust/detail/swap.inl b/compat/thrust/detail/swap.inl deleted file mode 100644 index eafd70ae6a..0000000000 --- a/compat/thrust/detail/swap.inl +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include -#include -#include - diff --git a/compat/thrust/detail/swap_ranges.inl b/compat/thrust/detail/swap_ranges.inl deleted file mode 100644 index e3b06deb01..0000000000 --- a/compat/thrust/detail/swap_ranges.inl +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file swap_ranges.inl - * \brief Inline file for swap_ranges.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - ForwardIterator2 swap_ranges(const thrust::detail::execution_policy_base &exec, - ForwardIterator1 first1, - ForwardIterator1 last1, - ForwardIterator2 first2) -{ - using thrust::system::detail::generic::swap_ranges; - return swap_ranges(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2); -} // end swap_ranges() - - -template - ForwardIterator2 swap_ranges(ForwardIterator1 first1, - ForwardIterator1 last1, - ForwardIterator2 first2) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::swap_ranges(select_system(system1,system2), first1, last1, first2); -} // end swap_ranges() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/tabulate.inl b/compat/thrust/detail/tabulate.inl deleted file mode 100644 index 961c76e9f6..0000000000 --- a/compat/thrust/detail/tabulate.inl +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void tabulate(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - UnaryOperation unary_op) -{ - using thrust::system::detail::generic::tabulate; - return tabulate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, unary_op); -} // end tabulate() - - -template - void tabulate(ForwardIterator first, - ForwardIterator last, - UnaryOperation unary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::tabulate(select_system(system), first, last, unary_op); -} // end tabulate() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/temporary_array.h b/compat/thrust/detail/temporary_array.h deleted file mode 100644 index 3a9e08481c..0000000000 --- a/compat/thrust/detail/temporary_array.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file temporary_array.h - * \brief Container-like class temporary storage inside algorithms. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template - class temporary_array - : public contiguous_storage< - T, - no_throw_allocator< - temporary_allocator - > - > -{ - private: - typedef contiguous_storage< - T, - no_throw_allocator< - temporary_allocator - > - > super_t; - - // to help out the constructor - typedef no_throw_allocator > alloc_type; - - public: - typedef typename super_t::size_type size_type; - - temporary_array(thrust::execution_policy &system, size_type n); - - // provide a kill-switch to explicitly avoid initialization - temporary_array(int uninit, thrust::execution_policy &system, size_type n); - - template - temporary_array(thrust::execution_policy &system, - InputIterator first, - size_type n); - - template - temporary_array(thrust::execution_policy &system, - thrust::execution_policy &input_system, - InputIterator first, - size_type n); - - template - temporary_array(thrust::execution_policy &system, - InputIterator first, - InputIterator last); - - template - temporary_array(thrust::execution_policy &system, - thrust::execution_policy &input_system, - InputIterator first, - InputIterator last); - - ~temporary_array(); -}; // end temporary_array - - -// XXX eliminate this when we do ranges for real -template - class tagged_iterator_range -{ - public: - typedef thrust::detail::tagged_iterator iterator; - - template - tagged_iterator_range(const Ignored1 &, const Ignored2 &, Iterator first, Iterator last) - : m_begin(reinterpret_tag(first)), - m_end(reinterpret_tag(last)) - {} - - iterator begin(void) const { return m_begin; } - iterator end(void) const { return m_end; } - - private: - iterator m_begin, m_end; -}; - - -// if FromSystem is convertible to ToSystem, then just make a shallow -// copy of the range. else, use a temporary_array -// note that the resulting iterator is explicitly tagged with ToSystem either way -template - struct move_to_system_base - : public eval_if< - is_convertible< - FromSystem, - ToSystem - >::value, - identity_< - tagged_iterator_range - >, - identity_< - temporary_array< - typename thrust::iterator_value::type, - ToSystem - > - > - > -{}; - - -template - class move_to_system - : public move_to_system_base< - Iterator, - FromSystem, - ToSystem - >::type -{ - typedef typename move_to_system_base::type super_t; - - public: - move_to_system(thrust::execution_policy &from_system, - thrust::execution_policy &to_system, - Iterator first, - Iterator last) - : super_t(to_system, from_system, first, last) {} -}; - - -} // end detail -} // end thrust - -#include - diff --git a/compat/thrust/detail/temporary_array.inl b/compat/thrust/detail/temporary_array.inl deleted file mode 100644 index 36ed16736e..0000000000 --- a/compat/thrust/detail/temporary_array.inl +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - - -namespace thrust -{ - -namespace detail -{ -namespace temporary_array_detail -{ - - -template struct avoid_initialization : thrust::detail::has_trivial_copy_constructor {}; - - -template -typename thrust::detail::enable_if< - avoid_initialization::value ->::type - construct_values(TemporaryArray &, - Size) -{ - // avoid the overhead of initialization -} // end construct_values() - - -template -typename thrust::detail::disable_if< - avoid_initialization::value ->::type - construct_values(TemporaryArray &a, - Size n) -{ - a.default_construct_n(a.begin(), n); -} // end construct_values() - - -} // end temporary_array_detail - - -template - temporary_array - ::temporary_array(thrust::execution_policy &system, size_type n) - :super_t(n, alloc_type(temporary_allocator(system))) -{ - temporary_array_detail::construct_values(*this, n); -} // end temporary_array::temporary_array() - - -template - temporary_array - ::temporary_array(int, thrust::execution_policy &system, size_type n) - :super_t(n, alloc_type(temporary_allocator(system))) -{ - // avoid initialization - ; -} // end temporary_array::temporary_array() - - -template - template - temporary_array - ::temporary_array(thrust::execution_policy &system, - InputIterator first, - size_type n) - : super_t(alloc_type(temporary_allocator(system))) -{ - super_t::allocate(n); - - super_t::uninitialized_copy_n(system, first, n, super_t::begin()); -} // end temporary_array::temporary_array() - - -template - template - temporary_array - ::temporary_array(thrust::execution_policy &system, - thrust::execution_policy &input_system, - InputIterator first, - size_type n) - : super_t(alloc_type(temporary_allocator(system))) -{ - super_t::allocate(n); - - super_t::uninitialized_copy_n(input_system, first, n, super_t::begin()); -} // end temporary_array::temporary_array() - - -template - template - temporary_array - ::temporary_array(thrust::execution_policy &system, - InputIterator first, - InputIterator last) - : super_t(alloc_type(temporary_allocator(system))) -{ - super_t::allocate(thrust::distance(first,last)); - - super_t::uninitialized_copy(system, first, last, super_t::begin()); -} // end temporary_array::temporary_array() - - -template - template - temporary_array - ::temporary_array(thrust::execution_policy &system, - thrust::execution_policy &input_system, - InputIterator first, - InputIterator last) - : super_t(alloc_type(temporary_allocator(system))) -{ - super_t::allocate(thrust::distance(first,last)); - - super_t::uninitialized_copy(input_system, first, last, super_t::begin()); -} // end temporary_array::temporary_array() - - -template - temporary_array - ::~temporary_array() -{ - // note that super_t::destroy will ignore trivial destructors automatically - super_t::destroy(super_t::begin(), super_t::end()); -} // end temporary_array::~temporary_array() - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/temporary_buffer.h b/compat/thrust/detail/temporary_buffer.h deleted file mode 100644 index 046a3b363a..0000000000 --- a/compat/thrust/detail/temporary_buffer.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ -namespace get_temporary_buffer_detail -{ - - -template - thrust::pair, typename thrust::pointer::difference_type> - down_cast_pair(Pair p) -{ - // XXX should use a hypothetical thrust::static_pointer_cast here - thrust::pointer ptr = thrust::pointer(static_cast(thrust::raw_pointer_cast(p.first))); - - typedef thrust::pair, typename thrust::pointer::difference_type> result_type; - return result_type(ptr, p.second); -} // end down_cast_pair() - - -} // end get_temporary_buffer_detail -} // end detail - - -template - thrust::pair, typename thrust::pointer::difference_type> - get_temporary_buffer(const thrust::detail::execution_policy_base &exec, typename thrust::pointer::difference_type n) -{ - using thrust::system::detail::generic::get_temporary_buffer; - - return thrust::detail::get_temporary_buffer_detail::down_cast_pair(get_temporary_buffer(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), n)); -} // end get_temporary_buffer() - - -template - void return_temporary_buffer(const thrust::detail::execution_policy_base &exec, Pointer p) -{ - using thrust::system::detail::generic::return_temporary_buffer; - - return return_temporary_buffer(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), p); -} // end return_temporary_buffer() - - -} // end thrust - diff --git a/compat/thrust/detail/transform.inl b/compat/thrust/detail/transform.inl deleted file mode 100644 index ae303bcc0d..0000000000 --- a/compat/thrust/detail/transform.inl +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file transform.inl - * \brief Inline file for transform.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputIterator transform(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result, - UnaryFunction op) -{ - using thrust::system::detail::generic::transform; - return transform(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, op); -} // end transform() - - -template - OutputIterator transform(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryFunction op) -{ - using thrust::system::detail::generic::transform; - return transform(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, result, op); -} // end transform() - - -template - ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - ForwardIterator result, - UnaryFunction op, - Predicate pred) -{ - using thrust::system::detail::generic::transform_if; - return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, op, pred); -} // end transform_if() - - -template - ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, InputIterator1 last, - InputIterator2 stencil, - ForwardIterator result, - UnaryFunction op, - Predicate pred) -{ - using thrust::system::detail::generic::transform_if; - return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, stencil, result, op, pred); -} // end transform_if() - - -template - ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, - InputIterator3 stencil, - ForwardIterator result, - BinaryFunction binary_op, - Predicate pred) -{ - using thrust::system::detail::generic::transform_if; - return transform_if(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first1, last1, first2, stencil, result, binary_op, pred); -} // end transform_if() - - -template - OutputIterator transform(InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::transform(select_system(system1,system2), first, last, result, op); -} // end transform() - - -template - OutputIterator transform(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryFunction op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::transform(select_system(system1,system2,system3), first1, last1, first2, result, op); -} // end transform() - - -template - ForwardIterator transform_if(InputIterator first, - InputIterator last, - ForwardIterator result, - UnaryFunction unary_op, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::transform_if(select_system(system1,system2), first, last, result, unary_op, pred); -} // end transform_if() - - -template - ForwardIterator transform_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - ForwardIterator result, - UnaryFunction unary_op, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - - System1 system1; - System2 system2; - System3 system3; - - return thrust::transform_if(select_system(system1,system2,system3), first, last, stencil, result, unary_op, pred); -} // end transform_if() - - -template - ForwardIterator transform_if(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator3 stencil, - ForwardIterator result, - BinaryFunction binary_op, - Predicate pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::transform_if(select_system(system1,system2,system3,system4), first1, last1, first2, stencil, result, binary_op, pred); -} // end transform_if() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/transform_reduce.inl b/compat/thrust/detail/transform_reduce.inl deleted file mode 100644 index ede65030b1..0000000000 --- a/compat/thrust/detail/transform_reduce.inl +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file transform_reduce.inl - * \brief Inline file for transform_reduce.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputType transform_reduce(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - UnaryFunction unary_op, - OutputType init, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::transform_reduce; - return transform_reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, unary_op, init, binary_op); -} // end transform_reduce() - - -template - OutputType transform_reduce(InputIterator first, - InputIterator last, - UnaryFunction unary_op, - OutputType init, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::transform_reduce(select_system(system), first, last, unary_op, init, binary_op); -} // end transform_reduce() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/transform_scan.inl b/compat/thrust/detail/transform_scan.inl deleted file mode 100644 index 0187c4b98d..0000000000 --- a/compat/thrust/detail/transform_scan.inl +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file transform_scan.inl - * \brief Inline file for transform_scan.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - OutputIterator transform_inclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::transform_inclusive_scan; - return transform_inclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, unary_op, binary_op); -} // end transform_inclusive_scan() - - -template - OutputIterator transform_exclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - T init, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::transform_exclusive_scan; - return transform_exclusive_scan(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result, unary_op, init, binary_op); -} // end transform_exclusive_scan() - - -template - OutputIterator transform_inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - BinaryFunction binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::transform_inclusive_scan(select_system(system1,system2), first, last, result, unary_op, binary_op); -} // end transform_inclusive_scan() - - -template - OutputIterator transform_exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - T init, - AssociativeOperator binary_op) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::transform_exclusive_scan(select_system(system1,system2), first, last, result, unary_op, init, binary_op); -} // end transform_exclusive_scan() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/trivial_sequence.h b/compat/thrust/detail/trivial_sequence.h deleted file mode 100644 index cc7e32be27..0000000000 --- a/compat/thrust/detail/trivial_sequence.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file trivial_sequence.h - * \brief Container-like class for wrapping sequences. The wrapped - * sequence always has trivial iterators, even when the input - * sequence does not. - */ - - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - -// never instantiated -template struct _trivial_sequence { }; - -// trivial case -template -struct _trivial_sequence -{ - typedef Iterator iterator_type; - Iterator first, last; - - _trivial_sequence(thrust::execution_policy &, Iterator _first, Iterator _last) : first(_first), last(_last) - { -// std::cout << "trivial case" << std::endl; - } - - iterator_type begin() { return first; } - iterator_type end() { return last; } -}; - -// non-trivial case -template -struct _trivial_sequence -{ - typedef typename thrust::iterator_value::type iterator_value; - typedef typename thrust::detail::temporary_array::iterator iterator_type; - - thrust::detail::temporary_array buffer; - - _trivial_sequence(thrust::execution_policy &exec, Iterator first, Iterator last) - : buffer(exec, first, last) - { -// std::cout << "non-trivial case" << std::endl; - } - - iterator_type begin() { return buffer.begin(); } - iterator_type end() { return buffer.end(); } -}; - -template -struct trivial_sequence - : detail::_trivial_sequence::type> -{ - typedef _trivial_sequence::type> super_t; - - trivial_sequence(thrust::execution_policy &exec, Iterator first, Iterator last) : super_t(exec, first, last) { } -}; - -} // end namespace detail - -} // end namespace thrust - diff --git a/compat/thrust/detail/tuple.inl b/compat/thrust/detail/tuple.inl deleted file mode 100644 index 067ad636c4..0000000000 --- a/compat/thrust/detail/tuple.inl +++ /dev/null @@ -1,948 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace thrust -{ - -// define null_type -struct null_type {}; - -// null_type comparisons -__host__ __device__ inline -bool operator==(const null_type&, const null_type&) { return true; } - -__host__ __device__ inline -bool operator>=(const null_type&, const null_type&) { return true; } - -__host__ __device__ inline -bool operator<=(const null_type&, const null_type&) { return true; } - -__host__ __device__ inline -bool operator!=(const null_type&, const null_type&) { return false; } - -__host__ __device__ inline -bool operator<(const null_type&, const null_type&) { return false; } - -__host__ __device__ inline -bool operator>(const null_type&, const null_type&) { return false; } - -// forward declaration for tuple -template < - class T0 = null_type, class T1 = null_type, class T2 = null_type, - class T3 = null_type, class T4 = null_type, class T5 = null_type, - class T6 = null_type, class T7 = null_type, class T8 = null_type, - class T9 = null_type> -class tuple; - -// forward declaration of tuple_element -template struct tuple_element; - -// specializations for tuple_element -template - struct tuple_element<0,T> -{ - typedef typename T::head_type type; -}; // end tuple_element<0,T> - -template - struct tuple_element -{ - private: - typedef typename T::tail_type Next; - typedef typename tuple_element::type unqualified_type; - - public: - typedef typename thrust::detail::add_const::type type; -}; // end tuple_element - -template - struct tuple_element<0,const T> -{ - typedef typename thrust::detail::add_const::type type; -}; // end tuple_element<0,const T> - - - -// forward declaration of tuple_size -template struct tuple_size; - -// specializations for tuple_size -template<> - struct tuple_size< tuple<> > -{ - static const int value = 0; -}; // end tuple_size< tuple<> > - -template<> - struct tuple_size -{ - static const int value = 0; -}; // end tuple_size - - - -// forward declaration of detail::cons -namespace detail -{ - -template struct cons; - -} // end detail - - -// -- some traits classes for get functions -template struct access_traits -{ - typedef const T& const_type; - typedef T& non_const_type; - - typedef const typename thrust::detail::remove_cv::type& parameter_type; - -// used as the tuple constructors parameter types -// Rationale: non-reference tuple element types can be cv-qualified. -// It should be possible to initialize such types with temporaries, -// and when binding temporaries to references, the reference must -// be non-volatile and const. 8.5.3. (5) -}; // end access_traits - -template struct access_traits -{ - typedef T& const_type; - typedef T& non_const_type; - - typedef T& parameter_type; -}; // end access_traits - -// forward declarations of get() -template -__host__ __device__ -inline typename access_traits< - typename tuple_element >::type - >::non_const_type -// XXX we probably don't need to do this for any compiler we care about -jph -//get(cons& c BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE(int, N)); -get(detail::cons& c); - -template -__host__ __device__ -inline typename access_traits< - typename tuple_element >::type - >::const_type -// XXX we probably don't need to do this for any compiler we care about -jph -//get(const cons& c BOOST_APPEND_EXPLICIT_TEMPLATE_NON_TYPE(int, N)); -get(const detail::cons& c); - -namespace detail -{ - -// -- generate error template, referencing to non-existing members of this -// template is used to produce compilation errors intentionally -template -class generate_error; - -// - cons getters -------------------------------------------------------- -// called: get_class::get(aTuple) - -template< int N > -struct get_class -{ - template - __host__ __device__ - inline static RET get(const cons& t) - { - // XXX we may not need to deal with this for any compiler we care about -jph - //return get_class::BOOST_NESTED_TEMPLATE get(t.tail); - return get_class::template get(t.tail); - - // gcc 4.3 couldn't compile this: - //return get_class::get(t.tail); - } - - template - __host__ __device__ - inline static RET get(cons& t) - { - // XXX we may not need to deal with this for any compiler we care about -jph - //return get_class::BOOST_NESTED_TEMPLATE get(t.tail); - return get_class::template get(t.tail); - - // gcc 4.3 couldn't compile this: - //return get_class::get(t.tail); - } -}; // end get_class - -template<> -struct get_class<0> -{ - template - __host__ __device__ - inline static RET get(const cons& t) - { - return t.head; - } - - template - __host__ __device__ - inline static RET get(cons& t) - { - return t.head; - } -}; // get get_class<0> - - -template struct IF -{ - typedef Then RET; -}; - -template struct IF -{ - typedef Else RET; -}; - -// These helper templates wrap void types and plain function types. -// The rationale is to allow one to write tuple types with those types -// as elements, even though it is not possible to instantiate such object. -// E.g: typedef tuple some_type; // ok -// but: some_type x; // fails - -template class non_storeable_type -{ - __host__ __device__ - non_storeable_type(); -}; - -template struct wrap_non_storeable_type -{ - // XXX is_function looks complicated; punt for now -jph - //typedef typename IF< - // ::thrust::detail::is_function::value, non_storeable_type, T - //>::RET type; - - typedef T type; -}; - -template <> struct wrap_non_storeable_type -{ - typedef non_storeable_type type; -}; - - -template - struct cons -{ - typedef HT head_type; - typedef TT tail_type; - - typedef typename - wrap_non_storeable_type::type stored_head_type; - - stored_head_type head; - tail_type tail; - - inline __host__ __device__ - typename access_traits::non_const_type - get_head() { return head; } - - inline __host__ __device__ - typename access_traits::non_const_type - get_tail() { return tail; } - - inline __host__ __device__ - typename access_traits::const_type - get_head() const { return head; } - - inline __host__ __device__ - typename access_traits::const_type - get_tail() const { return tail; } - - inline __host__ __device__ - cons(void) : head(), tail() {} - // cons() : head(detail::default_arg::f()), tail() {} - - // the argument for head is not strictly needed, but it prevents - // array type elements. This is good, since array type elements - // cannot be supported properly in any case (no assignment, - // copy works only if the tails are exactly the same type, ...) - - inline __host__ __device__ - cons(typename access_traits::parameter_type h, - const tail_type& t) - : head (h), tail(t) {} - - template - inline __host__ __device__ - cons( T1& t1, T2& t2, T3& t3, T4& t4, T5& t5, - T6& t6, T7& t7, T8& t8, T9& t9, T10& t10 ) - : head (t1), - tail (t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(null_type())) - {} - - template - inline __host__ __device__ - cons( const null_type& /*t1*/, T2& t2, T3& t3, T4& t4, T5& t5, - T6& t6, T7& t7, T8& t8, T9& t9, T10& t10 ) - : head (), - tail (t2, t3, t4, t5, t6, t7, t8, t9, t10, static_cast(null_type())) - {} - - - template - inline __host__ __device__ - cons( const cons& u ) : head(u.head), tail(u.tail) {} - - template - inline __host__ __device__ - cons& operator=( const cons& u ) { - head=u.head; tail=u.tail; return *this; - } - - // must define assignment operator explicitly, implicit version is - // illformed if HT is a reference (12.8. (12)) - inline __host__ __device__ - cons& operator=(const cons& u) { - head = u.head; tail = u.tail; return *this; - } - - // XXX enable when we support std::pair -jph - //template - //__host__ __device__ - //cons& operator=( const std::pair& u ) { - // //BOOST_STATIC_ASSERT(length::value == 2); // check length = 2 - // head = u.first; tail.head = u.second; return *this; - //} - - // get member functions (non-const and const) - template - __host__ __device__ - typename access_traits< - typename tuple_element >::type - >::non_const_type - get() { - return thrust::get(*this); // delegate to non-member get - } - - template - __host__ __device__ - typename access_traits< - typename tuple_element >::type - >::const_type - get() const { - return thrust::get(*this); // delegate to non-member get - } - - inline __host__ __device__ - void swap(cons &c) - { - using thrust::swap; - - swap(head, c.head); - tail.swap(c.tail); - } -}; - -template - struct cons -{ - typedef HT head_type; - typedef null_type tail_type; - typedef cons self_type; - - typedef typename - wrap_non_storeable_type::type stored_head_type; - stored_head_type head; - - typename access_traits::non_const_type - inline __host__ __device__ - get_head() { return head; } - - inline __host__ __device__ - null_type get_tail() { return null_type(); } - - inline __host__ __device__ - typename access_traits::const_type - get_head() const { return head; } - - inline __host__ __device__ - null_type get_tail() const { return null_type(); } - - inline __host__ __device__ - cons() : head() {} - - inline __host__ __device__ - cons(typename access_traits::parameter_type h, - const null_type& = null_type()) - : head (h) {} - - template - inline __host__ __device__ - cons(T1& t1, const null_type&, const null_type&, const null_type&, - const null_type&, const null_type&, const null_type&, - const null_type&, const null_type&, const null_type&) - : head (t1) {} - - inline __host__ __device__ - cons(const null_type&, - const null_type&, const null_type&, const null_type&, - const null_type&, const null_type&, const null_type&, - const null_type&, const null_type&, const null_type&) - : head () {} - - template - inline __host__ __device__ - cons( const cons& u ) : head(u.head) {} - - template - inline __host__ __device__ - cons& operator=(const cons& u ) - { - head = u.head; - return *this; - } - - // must define assignment operator explicitly, implicit version - // is illformed if HT is a reference - inline __host__ __device__ - cons& operator=(const cons& u) { head = u.head; return *this; } - - template - inline __host__ __device__ - typename access_traits< - typename tuple_element::type - >::non_const_type - // XXX we probably don't need this for the compilers we care about -jph - //get(BOOST_EXPLICIT_TEMPLATE_NON_TYPE(int, N)) - get(void) - { - return thrust::get(*this); - } - - template - inline __host__ __device__ - typename access_traits< - typename tuple_element::type - >::const_type - // XXX we probably don't need this for the compilers we care about -jph - //get(BOOST_EXPLICIT_TEMPLATE_NON_TYPE(int, N)) const - get(void) const - { - return thrust::get(*this); - } - - inline __host__ __device__ - void swap(cons &c) - { - using thrust::swap; - - swap(head, c.head); - } -}; // end cons - -template - struct map_tuple_to_cons -{ - typedef cons::type - > type; -}; // end map_tuple_to_cons - -// The empty tuple is a null_type -template <> - struct map_tuple_to_cons -{ - typedef null_type type; -}; // end map_tuple_to_cons<...> - - - -// --------------------------------------------------------------------------- -// The call_traits for make_tuple - -// Must be instantiated with plain or const plain types (not with references) - -// from template foo(const T& t) : make_tuple_traits::type -// from template foo(T& t) : make_tuple_traits::type - -// Conversions: -// T -> T, -// references -> compile_time_error -// array -> const ref array - - -template -struct make_tuple_traits { - typedef T type; - - // commented away, see below (JJ) - // typedef typename IF< - // boost::is_function::value, - // T&, - // T>::RET type; - -}; - -// The is_function test was there originally for plain function types, -// which can't be stored as such (we must either store them as references or -// pointers). Such a type could be formed if make_tuple was called with a -// reference to a function. -// But this would mean that a const qualified function type was formed in -// the make_tuple function and hence make_tuple can't take a function -// reference as a parameter, and thus T can't be a function type. -// So is_function test was removed. -// (14.8.3. says that type deduction fails if a cv-qualified function type -// is created. (It only applies for the case of explicitly specifying template -// args, though?)) (JJ) - -template -struct make_tuple_traits { - typedef typename - detail::generate_error:: - do_not_use_with_reference_type error; -}; - -// Arrays can't be stored as plain types; convert them to references. -// All arrays are converted to const. This is because make_tuple takes its -// parameters as const T& and thus the knowledge of the potential -// non-constness of actual argument is lost. -template struct make_tuple_traits { - typedef const T (&type)[n]; -}; - -template -struct make_tuple_traits { - typedef const T (&type)[n]; -}; - -template struct make_tuple_traits { - typedef const volatile T (&type)[n]; -}; - -template -struct make_tuple_traits { - typedef const volatile T (&type)[n]; -}; - -// XXX enable these if we ever care about reference_wrapper -jph -//template -//struct make_tuple_traits >{ -// typedef T& type; -//}; -// -//template -//struct make_tuple_traits >{ -// typedef T& type; -//}; - - -// a helper traits to make the make_tuple functions shorter (Vesa Karvonen's -// suggestion) -template < - class T0 = null_type, class T1 = null_type, class T2 = null_type, - class T3 = null_type, class T4 = null_type, class T5 = null_type, - class T6 = null_type, class T7 = null_type, class T8 = null_type, - class T9 = null_type -> -struct make_tuple_mapper { - typedef - tuple::type, - typename make_tuple_traits::type, - typename make_tuple_traits::type, - typename make_tuple_traits::type, - typename make_tuple_traits::type, - typename make_tuple_traits::type, - typename make_tuple_traits::type, - typename make_tuple_traits::type, - typename make_tuple_traits::type, - typename make_tuple_traits::type> type; -}; - -} // end detail - - -template -__host__ __device__ -inline typename access_traits< - typename tuple_element >::type - >::non_const_type -get(detail::cons& c) -{ - //return detail::get_class::BOOST_NESTED_TEMPLATE - - // gcc 4.3 couldn't compile this: - //return detail::get_class:: - - return detail::get_class::template - get< - typename access_traits< - typename tuple_element >::type - >::non_const_type, - HT,TT - >(c); -} - - -// get function for const cons-lists, returns a const reference to -// the element. If the element is a reference, returns the reference -// as such (that is, can return a non-const reference) -template -__host__ __device__ -inline typename access_traits< - typename tuple_element >::type - >::const_type -get(const detail::cons& c) -{ - //return detail::get_class::BOOST_NESTED_TEMPLATE - - // gcc 4.3 couldn't compile this: - //return detail::get_class:: - - return detail::get_class::template - get< - typename access_traits< - typename tuple_element >::type - >::const_type, - HT,TT - >(c); -} - - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1,t2); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1,t2,t3); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1,t2,t3,t4); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1,t2,t3,t4,t5); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1,t2,t3,t4,t5,t6); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1,t2,t3,t4,t5,t6,t7); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1,t2,t3,t4,t5,t6,t7,t8); -} // end make_tuple() - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9) -{ - typedef typename detail::make_tuple_mapper::type t; - return t(t0,t1,t2,t3,t4,t5,t6,t7,t8,t9); -} // end make_tuple() - - -template -__host__ __device__ inline -tuple tie(T0 &t0) -{ - return tuple(t0); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1) -{ - return tuple(t0,t1); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2) -{ - return tuple(t0,t1,t2); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3) -{ - return tuple(t0,t1,t2,t3); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4) -{ - return tuple(t0,t1,t2,t3,t4); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5) -{ - return tuple(t0,t1,t2,t3,t4,t5); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6) -{ - return tuple(t0,t1,t2,t3,t4,t5,t6); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7) -{ - return tuple(t0,t1,t2,t3,t4,t5,t6,t7); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8) -{ - return tuple(t0,t1,t2,t3,t4,t5,t6,t7,t8); -} - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8, T9 &t9) -{ - return tuple(t0,t1,t2,t3,t4,t5,t6,t7,t8,t9); -} - -template< - typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, - typename U0, typename U1, typename U2, typename U3, typename U4, typename U5, typename U6, typename U7, typename U8, typename U9 -> -__host__ __device__ inline -void swap(thrust::tuple &x, - thrust::tuple &y) -{ - return x.swap(y); -} - - - -namespace detail -{ - -template -__host__ __device__ -inline bool eq(const T1& lhs, const T2& rhs) { - return lhs.get_head() == rhs.get_head() && - eq(lhs.get_tail(), rhs.get_tail()); -} -template<> -inline bool eq(const null_type&, const null_type&) { return true; } - -template -__host__ __device__ -inline bool neq(const T1& lhs, const T2& rhs) { - return lhs.get_head() != rhs.get_head() || - neq(lhs.get_tail(), rhs.get_tail()); -} -template<> -__host__ __device__ -inline bool neq(const null_type&, const null_type&) { return false; } - -template -__host__ __device__ -inline bool lt(const T1& lhs, const T2& rhs) { - return (lhs.get_head() < rhs.get_head()) || - (!(rhs.get_head() < lhs.get_head()) && - lt(lhs.get_tail(), rhs.get_tail())); -} -template<> -__host__ __device__ -inline bool lt(const null_type&, const null_type&) { return false; } - -template -__host__ __device__ -inline bool gt(const T1& lhs, const T2& rhs) { - return (lhs.get_head() > rhs.get_head()) || - (!(rhs.get_head() > lhs.get_head()) && - gt(lhs.get_tail(), rhs.get_tail())); -} -template<> -__host__ __device__ -inline bool gt(const null_type&, const null_type&) { return false; } - -template -__host__ __device__ -inline bool lte(const T1& lhs, const T2& rhs) { - return lhs.get_head() <= rhs.get_head() && - ( !(rhs.get_head() <= lhs.get_head()) || - lte(lhs.get_tail(), rhs.get_tail())); -} -template<> -__host__ __device__ -inline bool lte(const null_type&, const null_type&) { return true; } - -template -__host__ __device__ -inline bool gte(const T1& lhs, const T2& rhs) { - return lhs.get_head() >= rhs.get_head() && - ( !(rhs.get_head() >= lhs.get_head()) || - gte(lhs.get_tail(), rhs.get_tail())); -} -template<> -__host__ __device__ -inline bool gte(const null_type&, const null_type&) { return true; } - -} // end detail - - - -// equal ---- - -template -__host__ __device__ -inline bool operator==(const detail::cons& lhs, const detail::cons& rhs) -{ - // XXX support this eventually -jph - //// check that tuple lengths are equal - //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); - - return detail::eq(lhs, rhs); -} // end operator==() - -// not equal ----- - -template -__host__ __device__ -inline bool operator!=(const detail::cons& lhs, const detail::cons& rhs) -{ - // XXX support this eventually -jph - //// check that tuple lengths are equal - //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); - - return detail::neq(lhs, rhs); -} // end operator!=() - -// < -template -__host__ __device__ -inline bool operator<(const detail::cons& lhs, const detail::cons& rhs) -{ - // XXX support this eventually -jph - //// check that tuple lengths are equal - //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); - - return detail::lt(lhs, rhs); -} // end operator<() - -// > -template -__host__ __device__ -inline bool operator>(const detail::cons& lhs, const detail::cons& rhs) -{ - // XXX support this eventually -jph - //// check that tuple lengths are equal - //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); - - return detail::gt(lhs, rhs); -} // end operator>() - -// <= -template -__host__ __device__ -inline bool operator<=(const detail::cons& lhs, const detail::cons& rhs) -{ - // XXX support this eventually -jph - //// check that tuple lengths are equal - //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); - - return detail::lte(lhs, rhs); -} // end operator<=() - -// >= -template -__host__ __device__ -inline bool operator>=(const detail::cons& lhs, const detail::cons& rhs) -{ - // XXX support this eventually -jph - //// check that tuple lengths are equal - //BOOST_STATIC_ASSERT(tuple_size::value == tuple_size::value); - - return detail::gte(lhs, rhs); -} // end operator>=() - -} // end thrust - diff --git a/compat/thrust/detail/tuple_meta_transform.h b/compat/thrust/detail/tuple_meta_transform.h deleted file mode 100644 index ff99709b6d..0000000000 --- a/compat/thrust/detail/tuple_meta_transform.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ - -namespace detail -{ - -template class UnaryMetaFunction, - unsigned int sz = thrust::tuple_size::value> - struct tuple_meta_transform; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef null_type type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -template class UnaryMetaFunction> - struct tuple_meta_transform -{ - typedef thrust::tuple< - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type, - typename UnaryMetaFunction::type>::type - > type; -}; - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/tuple_transform.h b/compat/thrust/detail/tuple_transform.h deleted file mode 100644 index f18b8727e1..0000000000 --- a/compat/thrust/detail/tuple_transform.h +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -namespace detail -{ - -template class UnaryMetaFunction, - typename UnaryFunction, - unsigned int sz = thrust::tuple_size::value> - struct tuple_transform_functor; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - return thrust::null_type(); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - return thrust::null_type(); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t)), - f(thrust::get<6>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t)), - f(thrust::get<6>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t)), - f(thrust::get<6>(t)), - f(thrust::get<7>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t)), - f(thrust::get<6>(t)), - f(thrust::get<7>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t)), - f(thrust::get<6>(t)), - f(thrust::get<7>(t)), - f(thrust::get<8>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t)), - f(thrust::get<6>(t)), - f(thrust::get<7>(t)), - f(thrust::get<8>(t))); - } -}; - - -template class UnaryMetaFunction, - typename UnaryFunction> - struct tuple_transform_functor -{ - static __host__ - typename tuple_meta_transform::type - do_it_on_the_host(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t)), - f(thrust::get<6>(t)), - f(thrust::get<7>(t)), - f(thrust::get<8>(t)), - f(thrust::get<9>(t))); - } - - static __host__ __device__ - typename tuple_meta_transform::type - do_it_on_the_host_or_device(const Tuple &t, UnaryFunction f) - { - typedef typename tuple_meta_transform::type XfrmTuple; - - return XfrmTuple(f(thrust::get<0>(t)), - f(thrust::get<1>(t)), - f(thrust::get<2>(t)), - f(thrust::get<3>(t)), - f(thrust::get<4>(t)), - f(thrust::get<5>(t)), - f(thrust::get<6>(t)), - f(thrust::get<7>(t)), - f(thrust::get<8>(t)), - f(thrust::get<9>(t))); - } -}; - - -template class UnaryMetaFunction, - typename Tuple, - typename UnaryFunction> -typename tuple_meta_transform::type -tuple_host_transform(const Tuple &t, UnaryFunction f) -{ - return tuple_transform_functor::do_it_on_the_host(t,f); -} - -template class UnaryMetaFunction, - typename Tuple, - typename UnaryFunction> -typename tuple_meta_transform::type -__host__ __device__ -tuple_host_device_transform(const Tuple &t, UnaryFunction f) -{ - return tuple_transform_functor::do_it_on_the_host_or_device(t,f); -} - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/type_traits.h b/compat/thrust/detail/type_traits.h deleted file mode 100644 index 5dbeb906eb..0000000000 --- a/compat/thrust/detail/type_traits.h +++ /dev/null @@ -1,641 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file type_traits.h - * \brief Temporarily define some type traits - * until nvcc can compile tr1::type_traits. - */ - -#pragma once - -#include - -// XXX nvcc 2.2 closed beta can't compile type_traits -//// find type_traits -// -//#ifdef __GNUC__ -// -//#if __GNUC__ == 4 && __GNUC_MINOR__ == 2 -//#include -//#elif __GNUC__ == 4 && __GNUC_MINOR__ > 2 -//#include -//#endif // GCC version -// -//#endif // GCC -// -//#ifdef _MSC_VER -//#include -//#endif // MSVC - - -namespace thrust -{ - -// forward declaration of device_reference -template class device_reference; - -namespace detail -{ - /// helper classes [4.3]. - template - struct integral_constant - { - static const _Tp value = __v; - typedef _Tp value_type; - typedef integral_constant<_Tp, __v> type; - }; - - /// typedef for true_type - typedef integral_constant true_type; - - /// typedef for true_type - typedef integral_constant false_type; - -//template struct is_integral : public std::tr1::is_integral {}; -template struct is_integral : public false_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; -template<> struct is_integral : public true_type {}; - -template struct is_floating_point : public false_type {}; -template<> struct is_floating_point : public true_type {}; -template<> struct is_floating_point : public true_type {}; -template<> struct is_floating_point : public true_type {}; - -template struct is_arithmetic : public is_integral {}; -template<> struct is_arithmetic : public true_type {}; -template<> struct is_arithmetic : public true_type {}; -template<> struct is_arithmetic : public true_type {}; -template<> struct is_arithmetic : public true_type {}; - -template struct is_pointer : public false_type {}; -template struct is_pointer : public true_type {}; - -template struct is_device_ptr : public false_type {}; - -template struct is_void : public false_type {}; -template<> struct is_void : public true_type {}; -template<> struct is_void : public true_type {}; - - -namespace tt_detail -{ - - -} // end tt_detail - -template struct is_pod - : public integral_constant< - bool, - is_void::value || is_pointer::value || is_arithmetic::value -#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC -// use intrinsic type traits - || __is_pod(T) -#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC -// only use the intrinsic for >= 4.3 -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) - || __is_pod(T) -#endif // GCC VERSION -#endif // THRUST_HOST_COMPILER - > - {}; - - -template struct has_trivial_constructor - : public integral_constant< - bool, - is_pod::value -#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC - || __has_trivial_constructor(T) -#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC -// only use the intrinsic for >= 4.3 -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) - || __has_trivial_constructor(T) -#endif // GCC VERSION -#endif // THRUST_HOST_COMPILER - > -{}; - -template struct has_trivial_copy_constructor - : public integral_constant< - bool, - is_pod::value -#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC - || __has_trivial_copy(T) -#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC -// only use the intrinsic for >= 4.3 -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) - || __has_trivial_copy(T) -#endif // GCC VERSION -#endif // THRUST_HOST_COMPILER - > -{}; - -template struct has_trivial_destructor : public is_pod {}; - -template struct is_const : public false_type {}; -template struct is_const : public true_type {}; - -template struct is_volatile : public false_type {}; -template struct is_volatile : public true_type {}; - -template - struct add_const -{ - typedef T const type; -}; // end add_const - -template - struct remove_const -{ - typedef T type; -}; // end remove_const - -template - struct remove_const -{ - typedef T type; -}; // end remove_const - -template - struct add_volatile -{ - typedef volatile T type; -}; // end add_volatile - -template - struct remove_volatile -{ - typedef T type; -}; // end remove_volatile - -template - struct remove_volatile -{ - typedef T type; -}; // end remove_volatile - -template - struct add_cv -{ - typedef const volatile T type; -}; // end add_cv - -template - struct remove_cv -{ - typedef typename remove_const::type>::type type; -}; // end remove_cv - - -template struct is_reference : public false_type {}; -template struct is_reference : public true_type {}; - -template struct is_device_reference : public false_type {}; -template struct is_device_reference< thrust::device_reference > : public true_type {}; - - -// NB: Careful with reference to void. -template::value || is_reference<_Tp>::value)> - struct __add_reference_helper - { typedef _Tp& type; }; - -template - struct __add_reference_helper<_Tp, true> - { typedef _Tp type; }; - -template - struct add_reference - : public __add_reference_helper<_Tp>{}; - -template - struct remove_reference -{ - typedef T type; -}; // end remove_reference - -template - struct remove_reference -{ - typedef T type; -}; // end remove_reference - -template - struct is_same - : public false_type -{ -}; // end is_same - -template - struct is_same - : public true_type -{ -}; // end is_same - -template - struct lazy_is_same - : is_same -{ -}; // end lazy_is_same - -template - struct is_different - : public true_type -{ -}; // end is_different - -template - struct is_different - : public false_type -{ -}; // end is_different - -template - struct lazy_is_different - : is_different -{ -}; // end lazy_is_different - -namespace tt_detail -{ - -template - struct is_int_or_cref -{ - typedef typename remove_reference::type type_sans_ref; - static const bool value = (is_integral::value - || (is_integral::value - && is_const::value - && !is_volatile::value)); -}; // end is_int_or_cref - - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN -__THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_BEGIN - - -template - struct is_convertible_sfinae -{ - private: - typedef char one_byte; - typedef struct { char two_chars[2]; } two_bytes; - - static one_byte test(To); - static two_bytes test(...); - static From m_from; - - public: - static const bool value = sizeof(test(m_from)) == sizeof(one_byte); -}; // end is_convertible_sfinae - - -__THRUST_DISABLE_MSVC_FORCING_VALUE_TO_BOOL_END -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - - -template - struct is_convertible_needs_simple_test -{ - static const bool from_is_void = is_void::value; - static const bool to_is_void = is_void::value; - static const bool from_is_float = is_floating_point::type>::value; - static const bool to_is_int_or_cref = is_int_or_cref::value; - - static const bool value = (from_is_void || to_is_void || (from_is_float && to_is_int_or_cref)); -}; // end is_convertible_needs_simple_test - - -template::value> - struct is_convertible -{ - static const bool value = (is_void::value - || (is_int_or_cref::value - && !is_void::value)); -}; // end is_convertible - - -template - struct is_convertible -{ - static const bool value = (is_convertible_sfinae::type, To>::value); -}; // end is_convertible - - -} // end tt_detail - -template - struct is_convertible - : public integral_constant::value> -{ -}; // end is_convertible - - -template - struct is_one_convertible_to_the_other - : public integral_constant< - bool, - is_convertible::value || is_convertible::value - > -{}; - - -// mpl stuff - -template - struct or_ - : public integral_constant< - bool, - Condition1::value || Condition2::value || Condition3::value || Condition4::value || Condition5::value || Condition6::value || Condition7::value || Condition8::value || Condition9::value || Condition10::value - > -{ -}; // end or_ - -template - struct and_ - : public integral_constant -{ -}; // end and_ - -template - struct not_ - : public integral_constant -{ -}; // end not_ - -template - struct eval_if -{ -}; // end eval_if - -template - struct eval_if -{ - typedef typename Then::type type; -}; // end eval_if - -template - struct eval_if -{ - typedef typename Else::type type; -}; // end eval_if - -template -// struct identity -// XXX WAR nvcc's confusion with thrust::identity - struct identity_ -{ - typedef T type; -}; // end identity - -template struct enable_if {}; -template struct enable_if {typedef T type;}; - -template struct lazy_enable_if {}; -template struct lazy_enable_if {typedef typename T::type type;}; - -template struct disable_if : enable_if {}; -template struct lazy_disable_if : lazy_enable_if {}; - - -template - struct enable_if_convertible - : enable_if< is_convertible::value, T > -{}; - - -template - struct disable_if_convertible - : disable_if< is_convertible::value, T > -{}; - - -template - struct enable_if_different - : enable_if::value, Result> -{}; - - -template - struct is_numeric - : and_< - is_convertible, - is_convertible - > -{ -}; // end is_numeric - - -template struct is_reference_to_const : false_type {}; -template struct is_reference_to_const : true_type {}; - - -// make_unsigned follows - -namespace tt_detail -{ - -template struct make_unsigned_simple; - -template<> struct make_unsigned_simple { typedef unsigned char type; }; -template<> struct make_unsigned_simple { typedef signed char type; }; -template<> struct make_unsigned_simple { typedef unsigned char type; }; -template<> struct make_unsigned_simple { typedef unsigned short type; }; -template<> struct make_unsigned_simple { typedef unsigned short type; }; -template<> struct make_unsigned_simple { typedef unsigned int type; }; -template<> struct make_unsigned_simple { typedef unsigned int type; }; -template<> struct make_unsigned_simple { typedef unsigned long int type; }; -template<> struct make_unsigned_simple { typedef unsigned long int type; }; -template<> struct make_unsigned_simple { typedef unsigned long long int type; }; -template<> struct make_unsigned_simple { typedef unsigned long long int type; }; - -template - struct make_unsigned_base -{ - // remove cv - typedef typename remove_cv::type remove_cv_t; - - // get the simple unsigned type - typedef typename make_unsigned_simple::type unsigned_remove_cv_t; - - // add back const, volatile, both, or neither to the simple result - typedef typename eval_if< - is_const::value && is_volatile::value, - // add cv back - add_cv, - // check const & volatile individually - eval_if< - is_const::value, - // add c back - add_const, - eval_if< - is_volatile::value, - // add v back - add_volatile, - // original type was neither cv, return the simple unsigned result - identity_ - > - > - >::type type; -}; - -} // end tt_detail - -template - struct make_unsigned - : tt_detail::make_unsigned_base -{}; - -struct largest_available_float -{ -#if defined(__CUDA_ARCH__) -# if (__CUDA_ARCH__ < 130) - typedef float type; -# else - typedef double type; -# endif -#else - typedef double type; -#endif -}; - -// T1 wins if they are both the same size -template - struct larger_type - : thrust::detail::eval_if< - (sizeof(T2) > sizeof(T1)), - thrust::detail::identity_, - thrust::detail::identity_ - > -{}; - - -namespace is_base_of_ns -{ - -typedef char yes; -typedef struct { char two_chars[2]; } no; - -template - struct host -{ - operator Base*() const; - operator Derived*(); -}; // end host - -template - struct impl -{ - template static yes check(Derived *, T); - static no check(Base*, int); - - static const bool value = sizeof(check(host(), int())) == sizeof(yes); -}; // end impl - -} // end is_base_of_ns - - -template - struct is_base_of - : integral_constant< - bool, - is_base_of_ns::impl::value - > -{}; - -template - struct enable_if_base_of - : enable_if< - is_base_of::value, - Result - > -{}; - - -namespace is_assignable_ns -{ - -template - class is_assignable -{ - typedef char yes_type; - typedef struct { char array[2]; } no_type; - - template static typename add_reference::type declval(); - - template struct helper { typedef void * type; }; - - template static yes_type test(typename helper() = declval())>::type); - - template static no_type test(...); - - public: - static const bool value = sizeof(test(0)) == 1; -}; // end is_assignable - -} // end is_assignable_ns - - -template - struct is_assignable - : integral_constant< - bool, - is_assignable_ns::is_assignable::value - > -{}; - - -template - struct is_copy_assignable - : is_assignable< - typename add_reference::type, - typename add_reference::type>::type - > -{}; - - -} // end detail - -} // end thrust - -#include - diff --git a/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h b/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h deleted file mode 100644 index 92767b5497..0000000000 --- a/compat/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - -// this trait reports what type should be used as a temporary in certain algorithms -// which aggregate intermediate results from a function before writing to an output iterator - -// the pseudocode for deducing the type of the temporary used below: -// -// if Function is an AdaptableFunction -// result = Function::result_type -// else if OutputIterator2 is a "pure" output iterator -// result = InputIterator2::value_type -// else -// result = OutputIterator2::value_type -// -// XXX upon c++0x, TemporaryType needs to be: -// result_of::type -template - struct intermediate_type_from_function_and_iterators - : eval_if< - has_result_type::value, - result_type, - eval_if< - is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - > -{ -}; // end intermediate_type_from_function_and_iterators - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/type_traits/function_traits.h b/compat/thrust/detail/type_traits/function_traits.h deleted file mode 100644 index 39015c608d..0000000000 --- a/compat/thrust/detail/type_traits/function_traits.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -// forward definitions for is_commutative -template struct plus; -template struct multiplies; -template struct minimum; -template struct maximum; -template struct logical_or; -template struct logical_and; -template struct bit_or; -template struct bit_and; -template struct bit_xor; - -namespace detail -{ - - -// some metafunctions which check for the nested types of the adaptable functions - -__THRUST_DEFINE_HAS_NESTED_TYPE(has_result_type, result_type) - -__THRUST_DEFINE_HAS_NESTED_TYPE(has_argument_type, argument_type) - -__THRUST_DEFINE_HAS_NESTED_TYPE(has_first_argument_type, first_argument_type) - -__THRUST_DEFINE_HAS_NESTED_TYPE(has_second_argument_type, second_argument_type) - - -template - struct result_type -{ - typedef typename AdaptableBinaryFunction::result_type type; -}; - - -template - struct is_adaptable_unary_function - : thrust::detail::and_< - has_result_type, - has_argument_type - > -{}; - - -template - struct is_adaptable_binary_function - : thrust::detail::and_< - has_result_type, - thrust::detail::and_< - has_first_argument_type, - has_second_argument_type - > - > -{}; - - -template - struct is_commutative - : public thrust::detail::false_type -{}; - -template struct is_commutative< typename thrust::plus > : public thrust::detail::is_arithmetic {}; -template struct is_commutative< typename thrust::multiplies > : public thrust::detail::is_arithmetic {}; -template struct is_commutative< typename thrust::minimum > : public thrust::detail::is_arithmetic {}; -template struct is_commutative< typename thrust::maximum > : public thrust::detail::is_arithmetic {}; -template struct is_commutative< typename thrust::logical_or > : public thrust::detail::is_arithmetic {}; -template struct is_commutative< typename thrust::logical_and > : public thrust::detail::is_arithmetic {}; -template struct is_commutative< typename thrust::bit_or > : public thrust::detail::is_arithmetic {}; -template struct is_commutative< typename thrust::bit_and > : public thrust::detail::is_arithmetic {}; -template struct is_commutative< typename thrust::bit_xor > : public thrust::detail::is_arithmetic {}; - -} // end namespace detail -} // end namespace thrust - diff --git a/compat/thrust/detail/type_traits/has_member_function.h b/compat/thrust/detail/type_traits/has_member_function.h deleted file mode 100644 index 117f4cb9bf..0000000000 --- a/compat/thrust/detail/type_traits/has_member_function.h +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#define __THRUST_DEFINE_HAS_MEMBER_FUNCTION(trait_name, member_function_name) \ -template class trait_name; \ - \ -template \ -class trait_name \ -{ \ - class yes { char m; }; \ - class no { yes m[2]; }; \ - struct base_mixin \ - { \ - Result member_function_name(); \ - }; \ - struct base : public T, public base_mixin {}; \ - template class helper{}; \ - template \ - static no deduce(U*, helper* = 0); \ - static yes deduce(...); \ -public: \ - static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ - typedef thrust::detail::integral_constant type; \ -}; \ - \ -template \ -class trait_name \ -{ \ - class yes { char m; }; \ - class no { yes m[2]; }; \ - struct base_mixin \ - { \ - Result member_function_name(Arg); \ - }; \ - struct base : public T, public base_mixin {}; \ - template class helper{}; \ - template \ - static no deduce(U*, helper* = 0); \ - static yes deduce(...); \ -public: \ - static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ - typedef thrust::detail::integral_constant type; \ -}; \ - \ -template \ -class trait_name \ -{ \ - class yes { char m; }; \ - class no { yes m[2]; }; \ - struct base_mixin \ - { \ - Result member_function_name(Arg1,Arg2); \ - }; \ - struct base : public T, public base_mixin {}; \ - template class helper{}; \ - template \ - static no deduce(U*, helper* = 0); \ - static yes deduce(...); \ -public: \ - static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ - typedef thrust::detail::integral_constant type; \ -}; \ - \ -template \ -class trait_name \ -{ \ - class yes { char m; }; \ - class no { yes m[2]; }; \ - struct base_mixin \ - { \ - Result member_function_name(Arg1,Arg2,Arg3); \ - }; \ - struct base : public T, public base_mixin {}; \ - template class helper{}; \ - template \ - static no deduce(U*, helper* = 0); \ - static yes deduce(...); \ -public: \ - static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ - typedef thrust::detail::integral_constant type; \ -}; \ - \ -template \ -class trait_name \ -{ \ - class yes { char m; }; \ - class no { yes m[2]; }; \ - struct base_mixin \ - { \ - Result member_function_name(Arg1,Arg2,Arg3,Arg4); \ - }; \ - struct base : public T, public base_mixin {}; \ - template class helper{}; \ - template \ - static no deduce(U*, helper* = 0); \ - static yes deduce(...); \ -public: \ - static const bool value = sizeof(yes) == sizeof(deduce(static_cast(0))); \ - typedef thrust::detail::integral_constant type; \ -}; - diff --git a/compat/thrust/detail/type_traits/has_nested_type.h b/compat/thrust/detail/type_traits/has_nested_type.h deleted file mode 100644 index 98c9460500..0000000000 --- a/compat/thrust/detail/type_traits/has_nested_type.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#define __THRUST_DEFINE_HAS_NESTED_TYPE(trait_name, nested_type_name) \ -template \ - struct trait_name \ -{ \ - typedef char yes_type; \ - typedef int no_type; \ - template static yes_type test(typename S::nested_type_name *); \ - template static no_type test(...); \ - static bool const value = sizeof(test(0)) == sizeof(yes_type);\ - typedef thrust::detail::integral_constant type;\ -}; - diff --git a/compat/thrust/detail/type_traits/has_trivial_assign.h b/compat/thrust/detail/type_traits/has_trivial_assign.h deleted file mode 100644 index d248245e84..0000000000 --- a/compat/thrust/detail/type_traits/has_trivial_assign.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file type_traits.h - * \brief Temporarily define some type traits - * until nvcc can compile tr1::type_traits. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -namespace detail -{ - -template struct has_trivial_assign - : public integral_constant< - bool, - (is_pod::value && !is_const::value) -#if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC - || __has_trivial_assign(T) -#elif THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC -// only use the intrinsic for >= 4.3 -#if (__GNUC__ >= 4) && (__GNUC_MINOR__ >= 3) - || __has_trivial_assign(T) -#endif // GCC VERSION -#endif // THRUST_HOST_COMPILER - > -{}; - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/type_traits/is_call_possible.h b/compat/thrust/detail/type_traits/is_call_possible.h deleted file mode 100644 index 41b9539e19..0000000000 --- a/compat/thrust/detail/type_traits/is_call_possible.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -// inspired by Roman Perepelitsa's presentation from comp.lang.c++.moderated -// based on the implementation here: http://www.rsdn.ru/forum/cpp/2759773.1.aspx - -namespace thrust -{ -namespace detail -{ -namespace is_call_possible_detail -{ - -template class void_exp_result {}; - -template -U const& operator,(U const&, void_exp_result); - -template -U& operator,(U&, void_exp_result); - -template -struct clone_constness -{ - typedef dest_type type; -}; - -template -struct clone_constness -{ - typedef const dest_type type; -}; - -} // end is_call_possible_detail -} // end detail -} // end thrust - -#define __THRUST_DEFINE_IS_CALL_POSSIBLE(trait_name, member_function_name) \ -__THRUST_DEFINE_HAS_MEMBER_FUNCTION(trait_name##_has_member, member_function_name) \ - \ -template \ -struct trait_name \ -{ \ - private: \ - struct yes {}; \ - struct no { yes m[2]; }; \ - struct derived : public T \ - { \ - using T::member_function_name; \ - no member_function_name(...) const; \ - }; \ - \ - typedef typename thrust::detail::is_call_possible_detail::clone_constness::type derived_type; \ - \ - template \ - struct return_value_check \ - { \ - static yes deduce(Result); \ - static no deduce(...); \ - static no deduce(no); \ - static no deduce(thrust::detail::is_call_possible_detail::void_exp_result); \ - }; \ - \ - template \ - struct return_value_check \ - { \ - static yes deduce(...); \ - static no deduce(no); \ - }; \ - \ - template \ - struct impl \ - { \ - static const bool value = false; \ - }; \ - \ - template \ - struct impl \ - { \ - static typename add_reference::type test_me; \ - static typename add_reference::type arg; \ - \ - static const bool value = \ - sizeof( \ - return_value_check::deduce( \ - (test_me.member_function_name(arg), thrust::detail::is_call_possible_detail::void_exp_result()) \ - ) \ - ) == sizeof(yes); \ - }; \ - \ - template \ - struct impl \ - { \ - static typename add_reference::type test_me; \ - static typename add_reference::type arg1; \ - static typename add_reference::type arg2; \ - \ - static const bool value = \ - sizeof( \ - return_value_check::deduce( \ - (test_me.member_function_name(arg1,arg2), thrust::detail::is_call_possible_detail::void_exp_result()) \ - ) \ - ) == sizeof(yes); \ - }; \ - \ - template \ - struct impl \ - { \ - static typename add_reference::type test_me; \ - static typename add_reference::type arg1; \ - static typename add_reference::type arg2; \ - static typename add_reference::type arg3; \ - \ - static const bool value = \ - sizeof( \ - return_value_check::deduce( \ - (test_me.member_function_name(arg1,arg2,arg3), thrust::detail::is_call_possible_detail::void_exp_result()) \ - ) \ - ) == sizeof(yes); \ - }; \ - \ - template \ - struct impl \ - { \ - static typename add_reference::type test_me; \ - static typename add_reference::type arg1; \ - static typename add_reference::type arg2; \ - static typename add_reference::type arg3; \ - static typename add_reference::type arg4; \ - \ - static const bool value = \ - sizeof( \ - return_value_check::deduce( \ - (test_me.member_function_name(arg1,arg2,arg3,arg4), thrust::detail::is_call_possible_detail::void_exp_result()) \ - ) \ - ) == sizeof(yes); \ - }; \ - \ - public: \ - static const bool value = impl::value, Signature>::value; \ - typedef thrust::detail::integral_constant type; \ -}; - diff --git a/compat/thrust/detail/type_traits/is_metafunction_defined.h b/compat/thrust/detail/type_traits/is_metafunction_defined.h deleted file mode 100644 index fba0811fb3..0000000000 --- a/compat/thrust/detail/type_traits/is_metafunction_defined.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -namespace detail -{ - -__THRUST_DEFINE_HAS_NESTED_TYPE(is_metafunction_defined, type) - -template - struct enable_if_defined - : thrust::detail::lazy_enable_if< - is_metafunction_defined::value, - Metafunction - > -{}; - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h b/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h deleted file mode 100644 index cca59da045..0000000000 --- a/compat/thrust/detail/type_traits/iterator/is_discard_iterator.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -template -struct is_discard_iterator - : public thrust::detail::false_type -{}; - -template -struct is_discard_iterator< thrust::discard_iterator > - : public thrust::detail::true_type -{}; - -} // end namespace detail -} // end namespace thrust - diff --git a/compat/thrust/detail/type_traits/iterator/is_output_iterator.h b/compat/thrust/detail/type_traits/iterator/is_output_iterator.h deleted file mode 100644 index 4cefe6353a..0000000000 --- a/compat/thrust/detail/type_traits/iterator/is_output_iterator.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - - -template - struct is_void_like - : thrust::detail::or_< - thrust::detail::is_void, - thrust::detail::is_same - > -{}; // end is_void_like - - -template - struct lazy_is_void_like - : is_void_like -{}; // end lazy_is_void_like - - -// XXX this meta function should first check that T is actually an iterator -// -// if thrust::iterator_value is defined and thrust::iterator_value::type == void -// return false -// else -// return true -template - struct is_output_iterator - : eval_if< - is_metafunction_defined >::value, - lazy_is_void_like >, - thrust::detail::true_type - >::type -{ -}; // end is_output_iterator - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/type_traits/minimum_type.h b/compat/thrust/detail/type_traits/minimum_type.h deleted file mode 100644 index aaa011ec8d..0000000000 --- a/compat/thrust/detail/type_traits/minimum_type.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ - -namespace detail -{ - -namespace minimum_type_detail -{ - -// -// Returns the minimum type or is empty -// if T1 and T2 are unrelated. -// -template struct minimum_type_impl {}; - -template -struct minimum_type_impl -{ - typedef T2 type; -}; // end minimum_type_impl - -template -struct minimum_type_impl -{ - typedef T1 type; -}; // end minimum_type_impl - -template -struct minimum_type_impl -{ - typedef T1 type; -}; // end minimum_type_impl - -template -struct primitive_minimum_type - : minimum_type_detail::minimum_type_impl< - T1, - T2, - ::thrust::detail::is_convertible::value, - ::thrust::detail::is_convertible::value - > -{ -}; // end primitive_minimum_type - -// because some types are not convertible (even to themselves) -// specialize primitive_minimum_type for when both types are identical -template -struct primitive_minimum_type -{ - typedef T type; -}; // end primitive_minimum_type - -// XXX this belongs somewhere more general -struct any_conversion -{ - template operator T (void); -}; - -} // end minimum_type_detail - -template - struct minimum_type; - -// base case -template - struct minimum_type - : minimum_type_detail::primitive_minimum_type -{}; - -template - struct lazy_minimum_type - : minimum_type< - typename T1::type, - typename T2::type - > -{}; - -// carefully avoid referring to a nested ::type which may not exist -template - struct minimum_type - : lazy_minimum_type< - lazy_minimum_type< - lazy_minimum_type< - minimum_type< - T1,T2 - >, - minimum_type< - T3,T4 - > - >, - lazy_minimum_type< - minimum_type< - T5,T6 - >, - minimum_type< - T7,T8 - > - > - >, - lazy_minimum_type< - lazy_minimum_type< - minimum_type< - T9,T10 - >, - minimum_type< - T11,T12 - > - >, - lazy_minimum_type< - minimum_type< - T13,T14 - >, - minimum_type< - T15,T16 - > - > - > - > -{}; - -} // end detail - -} // end thrust - diff --git a/compat/thrust/detail/type_traits/pointer_traits.h b/compat/thrust/detail/type_traits/pointer_traits.h deleted file mode 100644 index a0b5dc625c..0000000000 --- a/compat/thrust/detail/type_traits/pointer_traits.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -template struct pointer_element; - -template class Ptr, typename Arg> - struct pointer_element > -{ - typedef Arg type; -}; - -template class Ptr, typename Arg1, typename Arg2> - struct pointer_element > -{ - typedef Arg1 type; -}; - -template class Ptr, typename Arg1, typename Arg2, typename Arg3> - struct pointer_element > -{ - typedef Arg1 type; -}; - -template class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4> - struct pointer_element > -{ - typedef Arg1 type; -}; - -template class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename Arg5> - struct pointer_element > -{ - typedef Arg1 type; -}; - -template - struct pointer_element -{ - typedef T type; -}; - -template - struct pointer_difference -{ - typedef typename Ptr::difference_type type; -}; - -template - struct pointer_difference -{ - typedef std::ptrdiff_t type; -}; - -template struct rebind_pointer; - -template - struct rebind_pointer -{ - typedef U* type; -}; - -template class Ptr, typename Arg, typename T> - struct rebind_pointer,T> -{ - typedef Ptr type; -}; - -template class Ptr, typename Arg1, typename Arg2, typename T> - struct rebind_pointer,T> -{ - typedef Ptr type; -}; - -template class Ptr, typename Arg1, typename Arg2, typename Arg3, typename T> - struct rebind_pointer,T> -{ - typedef Ptr type; -}; - -template class Ptr, typename Arg1, typename Arg2, typename Arg3, typename Arg4, typename T> - struct rebind_pointer,T> -{ - typedef Ptr type; -}; - -// XXX this should probably be renamed native_type or similar -__THRUST_DEFINE_HAS_NESTED_TYPE(has_raw_pointer, raw_pointer) - -namespace pointer_traits_detail -{ - -template struct pointer_raw_pointer_impl {}; - -template - struct pointer_raw_pointer_impl -{ - typedef T* type; -}; - -template - struct pointer_raw_pointer_impl::value>::type> -{ - typedef typename Ptr::raw_pointer type; -}; - -} // end pointer_traits_detail - -template - struct pointer_raw_pointer - : pointer_traits_detail::pointer_raw_pointer_impl -{}; - -namespace pointer_traits_detail -{ - -template - struct capture_address -{ - template - __host__ __device__ - capture_address(T &r) - : m_addr(&r) - {} - - inline __host__ __device__ - Void *operator&() const - { - return m_addr; - } - - Void *m_addr; -}; - -// metafunction to compute the type of pointer_to's parameter below -template - struct pointer_to_param - : thrust::detail::eval_if< - thrust::detail::is_void::value, - thrust::detail::identity_ >, - thrust::detail::add_reference - > -{}; - -} - -template - struct pointer_traits -{ - typedef Ptr pointer; - typedef typename pointer_element::type element_type; - typedef typename pointer_difference::type difference_type; - - template - struct rebind - { - typedef typename rebind_pointer::type other; - }; - - __host__ __device__ - inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param::type r) - { - // XXX this is supposed to be pointer::pointer_to(&r); (i.e., call a static member function of pointer called pointer_to) - // assume that pointer has a constructor from raw pointer instead - - return pointer(&r); - } - - // thrust additions follow - typedef typename pointer_raw_pointer::type raw_pointer; - - __host__ __device__ - inline static raw_pointer get(pointer ptr) - { - return ptr.get(); - } -}; - -template - struct pointer_traits -{ - typedef T* pointer; - typedef T element_type; - typedef typename pointer_difference::type difference_type; - - template - struct rebind - { - typedef U* other; - }; - - __host__ __device__ - inline static pointer pointer_to(typename pointer_traits_detail::pointer_to_param::type r) - { - return &r; - } - - // thrust additions follow - typedef typename pointer_raw_pointer::type raw_pointer; - - __host__ __device__ - inline static raw_pointer get(pointer ptr) - { - return ptr; - } -}; - -template - struct is_pointer_convertible - : thrust::detail::and_< - thrust::detail::is_convertible< - typename pointer_element::type *, - typename pointer_element::type * - >, - thrust::detail::is_convertible< - typename iterator_system::type, - typename iterator_system::type - > - > -{}; - -// this could be a lot better, but for our purposes, it's probably -// sufficient just to check if pointer_raw_pointer has meaning -template - struct is_thrust_pointer - : is_metafunction_defined > -{}; - -// avoid inspecting traits of the arguments if they aren't known to be pointers -template - struct lazy_is_pointer_convertible - : thrust::detail::eval_if< - is_thrust_pointer::value && is_thrust_pointer::value, - is_pointer_convertible, - thrust::detail::identity_ - > -{}; - -template - struct enable_if_pointer_is_convertible - : thrust::detail::enable_if< - lazy_is_pointer_convertible::type::value, - T - > -{}; - - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/type_traits/result_of.h b/compat/thrust/detail/type_traits/result_of.h deleted file mode 100644 index e30b4fda3b..0000000000 --- a/compat/thrust/detail/type_traits/result_of.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -template struct result_of; - -// specialization for unary invocations of things which have result_type -template - struct result_of< - Functor(Arg1), - typename thrust::detail::enable_if::value>::type - > -{ - typedef typename Functor::result_type type; -}; // end result_of - -// specialization for binary invocations of things which have result_type -template - struct result_of< - Functor(Arg1,Arg2), - typename thrust::detail::enable_if::value>::type - > -{ - typedef typename Functor::result_type type; -}; - -} // end detail -} // end thrust - diff --git a/compat/thrust/detail/uninitialized_copy.inl b/compat/thrust/detail/uninitialized_copy.inl deleted file mode 100644 index a01dca53d6..0000000000 --- a/compat/thrust/detail/uninitialized_copy.inl +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file uninitialized_copy.inl - * \brief Inline file for uninitialized_copy.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - ForwardIterator uninitialized_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - ForwardIterator result) -{ - using thrust::system::detail::generic::uninitialized_copy; - return uninitialized_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result); -} // end uninitialized_copy() - - -template - ForwardIterator uninitialized_copy_n(const thrust::detail::execution_policy_base &exec, - InputIterator first, - Size n, - ForwardIterator result) -{ - using thrust::system::detail::generic::uninitialized_copy_n; - return uninitialized_copy_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, result); -} // end uninitialized_copy_n() - - -template - ForwardIterator uninitialized_copy(InputIterator first, - InputIterator last, - ForwardIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::uninitialized_copy(select_system(system1,system2), first, last, result); -} // end uninitialized_copy() - - -template - ForwardIterator uninitialized_copy_n(InputIterator first, - Size n, - ForwardIterator result) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::uninitialized_copy_n(select_system(system1,system2), first, n, result); -} // end uninitialized_copy_n() - - -} // end thrust - - diff --git a/compat/thrust/detail/uninitialized_fill.inl b/compat/thrust/detail/uninitialized_fill.inl deleted file mode 100644 index 3545de56ee..0000000000 --- a/compat/thrust/detail/uninitialized_fill.inl +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file uninitialized_fill.inl - * \brief Inline file for uninitialized_fill.h. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template - void uninitialized_fill(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &x) -{ - using thrust::system::detail::generic::uninitialized_fill; - return uninitialized_fill(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, x); -} // end uninitialized_fill() - - -template - ForwardIterator uninitialized_fill_n(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - Size n, - const T &x) -{ - using thrust::system::detail::generic::uninitialized_fill_n; - return uninitialized_fill_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, x); -} // end uninitialized_fill_n() - - -template - void uninitialized_fill(ForwardIterator first, - ForwardIterator last, - const T &x) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - thrust::uninitialized_fill(select_system(system), first, last, x); -} // end uninitialized_fill() - - -template - ForwardIterator uninitialized_fill_n(ForwardIterator first, - Size n, - const T &x) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::uninitialized_fill_n(select_system(system), first, n, x); -} // end uninitialized_fill_n() - - -} // end thrust - diff --git a/compat/thrust/detail/unique.inl b/compat/thrust/detail/unique.inl deleted file mode 100644 index e90187d919..0000000000 --- a/compat/thrust/detail/unique.inl +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file unique.inl - * \brief Inline file for unique.h. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -template -ForwardIterator unique(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last) -{ - using thrust::system::detail::generic::unique; - return unique(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last); -} // end unique() - - -template -ForwardIterator unique(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::unique; - return unique(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, binary_pred); -} // end unique() - - -template -OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator output) -{ - using thrust::system::detail::generic::unique_copy; - return unique_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output); -} // end unique_copy() - - -template -OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::unique_copy; - return unique_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, output, binary_pred); -} // end unique_copy() - - -template - thrust::pair - unique_by_key(const thrust::detail::execution_policy_base &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first) -{ - using thrust::system::detail::generic::unique_by_key; - return unique_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first); -} // end unique_by_key() - - -template - thrust::pair - unique_by_key(const thrust::detail::execution_policy_base &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::unique_by_key; - return unique_by_key(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, binary_pred); -} // end unique_by_key() - - -template - thrust::pair - unique_by_key_copy(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output) -{ - using thrust::system::detail::generic::unique_by_key_copy; - return unique_by_key_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output); -} // end unique_by_key_copy() - - -template - thrust::pair - unique_by_key_copy(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::unique_by_key_copy; - return unique_by_key_copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), keys_first, keys_last, values_first, keys_output, values_output, binary_pred); -} // end unique_by_key_copy() - - -template - ForwardIterator unique(ForwardIterator first, - ForwardIterator last) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::unique(select_system(system), first, last); -} // end unique() - - -template - ForwardIterator unique(ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System; - - System system; - - return thrust::unique(select_system(system), first, last, binary_pred); -} // end unique() - - -template - OutputIterator unique_copy(InputIterator first, - InputIterator last, - OutputIterator output) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::unique_copy(select_system(system1,system2), first, last, output); -} // end unique_copy() - - -template - OutputIterator unique_copy(InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::unique_copy(select_system(system1,system2), first, last, output, binary_pred); -} // end unique_copy() - - -template - thrust::pair - unique_by_key(ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::unique_by_key(select_system(system1,system2), keys_first, keys_last, values_first); -} // end unique_by_key() - - -template - thrust::pair - unique_by_key(ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - System1 system1; - System2 system2; - - return thrust::unique_by_key(select_system(system1,system2), keys_first, keys_last, values_first, binary_pred); -} // end unique_by_key() - - -template - thrust::pair - unique_by_key_copy(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::unique_by_key_copy(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output); -} // end unique_by_key_copy() - - -template - thrust::pair - unique_by_key_copy(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - using thrust::system::detail::generic::select_system; - - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - typedef typename thrust::iterator_system::type System3; - typedef typename thrust::iterator_system::type System4; - - System1 system1; - System2 system2; - System3 system3; - System4 system4; - - return thrust::unique_by_key_copy(select_system(system1,system2,system3,system4), keys_first, keys_last, values_first, keys_output, values_output, binary_pred); -} // end unique_by_key_copy() - - -} // end namespace thrust - diff --git a/compat/thrust/detail/use_default.h b/compat/thrust/detail/use_default.h deleted file mode 100644 index c6eb66ef05..0000000000 --- a/compat/thrust/detail/use_default.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ - -struct use_default {}; - -} // end thrust - diff --git a/compat/thrust/detail/util/align.h b/compat/thrust/detail/util/align.h deleted file mode 100644 index 10f107a95b..0000000000 --- a/compat/thrust/detail/util/align.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include - -// functions to handle memory alignment - -namespace thrust -{ -namespace detail -{ -namespace util -{ - -template -T * align_up(T * ptr, detail::uintptr_t bytes) -{ - return (T *) ( bytes * (((detail::uintptr_t) ptr + (bytes - 1)) / bytes) ); -} - -template -T * align_down(T * ptr, detail::uintptr_t bytes) -{ - return (T *) ( bytes * (detail::uintptr_t(ptr) / bytes) ); -} - -template -bool is_aligned(T * ptr, detail::uintptr_t bytes = sizeof(T)) -{ - return detail::uintptr_t(ptr) % bytes == 0; -} - -} // end namespace util -} // end namespace detail -} // end namespace thrust - diff --git a/compat/thrust/detail/util/blocking.h b/compat/thrust/detail/util/blocking.h deleted file mode 100644 index 3bb78a637b..0000000000 --- a/compat/thrust/detail/util/blocking.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -//functions to support blocking - -namespace thrust -{ - -namespace detail -{ - -namespace util -{ - -// x/y rounding towards +infinity for integers, used to determine # of blocks/warps etc. -template - inline __host__ __device__ L divide_ri(const L x, const R y) -{ - return (x + (y - 1)) / y; -} - -// x/y rounding towards zero for integers, used to determine # of blocks/warps etc. -template - inline __host__ __device__ L divide_rz(const L x, const R y) -{ - return x / y; -} - -// round x towards infinity to the next multiple of y -template - inline __host__ __device__ L round_i(const L x, const R y){ return y * divide_ri(x, y); } - -// round x towards zero to the next multiple of y -template - inline __host__ __device__ L round_z(const L x, const R y){ return y * divide_rz(x, y); } - -} // end namespace util - -} // end namespace detail - -} // end namespace thrust - diff --git a/compat/thrust/detail/vector_base.h b/compat/thrust/detail/vector_base.h deleted file mode 100644 index 6974eab554..0000000000 --- a/compat/thrust/detail/vector_base.h +++ /dev/null @@ -1,534 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file vector_base.h - * \brief Defines the interface to a base class for - * host_vector & device_vector. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - -template - class vector_base -{ - private: - typedef thrust::detail::contiguous_storage storage_type; - - public: - // typedefs - typedef typename storage_type::value_type value_type; - typedef typename storage_type::pointer pointer; - typedef typename storage_type::const_pointer const_pointer; - typedef typename storage_type::reference reference; - typedef typename storage_type::const_reference const_reference; - typedef typename storage_type::size_type size_type; - typedef typename storage_type::difference_type difference_type; - typedef typename storage_type::allocator_type allocator_type; - - typedef typename storage_type::iterator iterator; - typedef typename storage_type::const_iterator const_iterator; - - typedef thrust::reverse_iterator reverse_iterator; - typedef thrust::reverse_iterator const_reverse_iterator; - - /*! This constructor creates an empty vector_base. - */ - vector_base(void); - - /*! This constructor creates a vector_base with default-constructed - * elements. - * \param n The number of elements to create. - */ - explicit vector_base(size_type n); - - /*! This constructor creates a vector_base with copies - * of an exemplar element. - * \param n The number of elements to initially create. - * \param value An element to copy. - */ - explicit vector_base(size_type n, const value_type &value); - - /*! Copy constructor copies from an exemplar vector_base. - * \param v The vector_base to copy. - */ - vector_base(const vector_base &v); - - /*! assign operator makes a copy of an exemplar vector_base. - * \param v The vector_base to copy. - */ - vector_base &operator=(const vector_base &v); - - /*! Copy constructor copies from an exemplar vector_base with different - * type. - * \param v The vector_base to copy. - */ - template - vector_base(const vector_base &v); - - /*! assign operator makes a copy of an exemplar vector_base with different - * type. - * \param v The vector_base to copy. - */ - template - vector_base &operator=(const vector_base &v); - - /*! Copy constructor copies from an exemplar std::vector. - * \param v The std::vector to copy. - * XXX TODO: Make this method redundant with a properly templatized constructor. - * We would like to copy from a vector whose element type is anything - * assignable to value_type. - */ - template - vector_base(const std::vector &v); - - /*! assign operator makes a copy of an exemplar std::vector. - * \param v The vector to copy. - * XXX TODO: Templatize this assign on the type of the vector to copy from. - * We would like to copy from a vector whose element type is anything - * assignable to value_type. - */ - template - vector_base &operator=(const std::vector &v); - - /*! This constructor builds a vector_base from a range. - * \param first The beginning of the range. - * \param last The end of the range. - */ - template - vector_base(InputIterator first, InputIterator last); - - /*! The destructor erases the elements. - */ - ~vector_base(void); - - /*! \brief Resizes this vector_base to the specified number of elements. - * \param new_size Number of elements this vector_base should contain. - * \throw std::length_error If n exceeds max_size9). - * - * This method will resize this vector_base to the specified number of - * elements. If the number is smaller than this vector_base's current - * size this vector_base is truncated, otherwise this vector_base is - * extended and new elements are default constructed. - */ - void resize(size_type new_size); - - /*! \brief Resizes this vector_base to the specified number of elements. - * \param new_size Number of elements this vector_base should contain. - * \param x Data with which new elements should be populated. - * \throw std::length_error If n exceeds max_size(). - * - * This method will resize this vector_base to the specified number of - * elements. If the number is smaller than this vector_base's current - * size this vector_base is truncated, otherwise this vector_base is - * extended and new elements are populated with given data. - */ - void resize(size_type new_size, const value_type &x); - - /*! Returns the number of elements in this vector_base. - */ - size_type size(void) const; - - /*! Returns the size() of the largest possible vector_base. - * \return The largest possible return value of size(). - */ - size_type max_size(void) const; - - /*! \brief If n is less than or equal to capacity(), this call has no effect. - * Otherwise, this method is a request for allocation of additional memory. If - * the request is successful, then capacity() is greater than or equal to - * n; otherwise, capacity() is unchanged. In either case, size() is unchanged. - * \throw std::length_error If n exceeds max_size(). - */ - void reserve(size_type n); - - /*! Returns the number of elements which have been reserved in this - * vector_base. - */ - size_type capacity(void) const; - - /*! This method shrinks the capacity of this vector_base to exactly - * fit its elements. - */ - void shrink_to_fit(void); - - /*! \brief Subscript access to the data contained in this vector_dev. - * \param n The index of the element for which data should be accessed. - * \return Read/write reference to data. - * - * This operator allows for easy, array-style, data access. - * Note that data access with this operator is unchecked and - * out_of_range lookups are not defined. - */ - reference operator[](size_type n); - - /*! \brief Subscript read access to the data contained in this vector_dev. - * \param n The index of the element for which data should be accessed. - * \return Read reference to data. - * - * This operator allows for easy, array-style, data access. - * Note that data access with this operator is unchecked and - * out_of_range lookups are not defined. - */ - const_reference operator[](size_type n) const; - - /*! This method returns an iterator pointing to the beginning of - * this vector_base. - * \return mStart - */ - iterator begin(void); - - /*! This method returns a const_iterator pointing to the beginning - * of this vector_base. - * \return mStart - */ - const_iterator begin(void) const; - - /*! This method returns a const_iterator pointing to the beginning - * of this vector_base. - * \return mStart - */ - const_iterator cbegin(void) const; - - /*! This method returns a reverse_iterator pointing to the beginning of - * this vector_base's reversed sequence. - * \return A reverse_iterator pointing to the beginning of this - * vector_base's reversed sequence. - */ - reverse_iterator rbegin(void); - - /*! This method returns a const_reverse_iterator pointing to the beginning of - * this vector_base's reversed sequence. - * \return A const_reverse_iterator pointing to the beginning of this - * vector_base's reversed sequence. - */ - const_reverse_iterator rbegin(void) const; - - /*! This method returns a const_reverse_iterator pointing to the beginning of - * this vector_base's reversed sequence. - * \return A const_reverse_iterator pointing to the beginning of this - * vector_base's reversed sequence. - */ - const_reverse_iterator crbegin(void) const; - - /*! This method returns an iterator pointing to one element past the - * last of this vector_base. - * \return begin() + size(). - */ - iterator end(void); - - /*! This method returns a const_iterator pointing to one element past the - * last of this vector_base. - * \return begin() + size(). - */ - const_iterator end(void) const; - - /*! This method returns a const_iterator pointing to one element past the - * last of this vector_base. - * \return begin() + size(). - */ - const_iterator cend(void) const; - - /*! This method returns a reverse_iterator pointing to one element past the - * last of this vector_base's reversed sequence. - * \return rbegin() + size(). - */ - reverse_iterator rend(void); - - /*! This method returns a const_reverse_iterator pointing to one element past the - * last of this vector_base's reversed sequence. - * \return rbegin() + size(). - */ - const_reverse_iterator rend(void) const; - - /*! This method returns a const_reverse_iterator pointing to one element past the - * last of this vector_base's reversed sequence. - * \return rbegin() + size(). - */ - const_reverse_iterator crend(void) const; - - /*! This method returns a const_reference referring to the first element of this - * vector_base. - * \return The first element of this vector_base. - */ - const_reference front(void) const; - - /*! This method returns a reference pointing to the first element of this - * vector_base. - * \return The first element of this vector_base. - */ - reference front(void); - - /*! This method returns a const reference pointing to the last element of - * this vector_base. - * \return The last element of this vector_base. - */ - const_reference back(void) const; - - /*! This method returns a reference referring to the last element of - * this vector_dev. - * \return The last element of this vector_base. - */ - reference back(void); - - /*! This method returns a pointer to this vector_base's first element. - * \return A pointer to the first element of this vector_base. - */ - pointer data(void); - - /*! This method returns a const_pointer to this vector_base's first element. - * \return a const_pointer to the first element of this vector_base. - */ - const_pointer data(void) const; - - /*! This method resizes this vector_base to 0. - */ - void clear(void); - - /*! This method returns true iff size() == 0. - * \return true if size() == 0; false, otherwise. - */ - bool empty(void) const; - - /*! This method appends the given element to the end of this vector_base. - * \param x The element to append. - */ - void push_back(const value_type &x); - - /*! This method erases the last element of this vector_base, invalidating - * all iterators and references to it. - */ - void pop_back(void); - - /*! This method swaps the contents of this vector_base with another vector_base. - * \param v The vector_base with which to swap. - */ - void swap(vector_base &v); - - /*! This method removes the element at position pos. - * \param pos The position of the element of interest. - * \return An iterator pointing to the new location of the element that followed the element - * at position pos. - */ - iterator erase(iterator pos); - - /*! This method removes the range of elements [first,last) from this vector_base. - * \param first The beginning of the range of elements to remove. - * \param last The end of the range of elements to remove. - * \return An iterator pointing to the new location of the element that followed the last - * element in the sequence [first,last). - */ - iterator erase(iterator first, iterator last); - - /*! This method inserts a single copy of a given exemplar value at the - * specified position in this vector_base. - * \param position The insertion position. - * \param x The exemplar element to copy & insert. - * \return An iterator pointing to the newly inserted element. - */ - iterator insert(iterator position, const T &x); - - /*! This method inserts a copy of an exemplar value to a range at the - * specified position in this vector_base. - * \param position The insertion position - * \param n The number of insertions to perform. - * \param x The value to replicate and insert. - */ - void insert(iterator position, size_type n, const T &x); - - /*! This method inserts a copy of an input range at the specified position - * in this vector_base. - * \param position The insertion position. - * \param first The beginning of the range to copy. - * \param last The end of the range to copy. - * - * \tparam InputIterator is a model of Assignable. - */ - template - void insert(iterator position, InputIterator first, InputIterator last); - - /*! This version of \p assign replicates a given exemplar - * \p n times into this vector_base. - * \param n The number of times to copy \p x. - * \param x The exemplar element to replicate. - */ - void assign(size_type n, const T &x); - - /*! This version of \p assign makes this vector_base a copy of a given input range. - * \param first The beginning of the range to copy. - * \param last The end of the range to copy. - * - * \tparam InputIterator is a model of Input Iterator. - */ - template - void assign(InputIterator first, InputIterator last); - - /*! This method returns a copy of this vector's allocator. - * \return A copy of the alloctor used by this vector. - */ - allocator_type get_allocator(void) const; - - protected: - // Our storage - storage_type m_storage; - - // The size of this vector_base, in number of elements. - size_type m_size; - - private: - // these methods resolve the ambiguity of the constructor template of form (Iterator, Iterator) - template - void init_dispatch(IteratorOrIntegralType begin, IteratorOrIntegralType end, false_type); - - template - void init_dispatch(IteratorOrIntegralType n, IteratorOrIntegralType value, true_type); - - template - void range_init(InputIterator first, InputIterator last); - - template - void range_init(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag); - - template - void range_init(ForwardIterator first, ForwardIterator last, thrust::random_access_traversal_tag); - - void default_init(size_type n); - - void fill_init(size_type n, const T &x); - - // these methods resolve the ambiguity of the insert() template of form (iterator, InputIterator, InputIterator) - template - void insert_dispatch(iterator position, InputIteratorOrIntegralType first, InputIteratorOrIntegralType last, false_type); - - // these methods resolve the ambiguity of the insert() template of form (iterator, InputIterator, InputIterator) - template - void insert_dispatch(iterator position, InputIteratorOrIntegralType n, InputIteratorOrIntegralType x, true_type); - - // this method appends n default-constructed elements at the end - void append(size_type n); - - // this method performs insertion from a fill value - void fill_insert(iterator position, size_type n, const T &x); - - // this method performs insertion from a range - template - void copy_insert(iterator position, InputIterator first, InputIterator last); - - // these methods resolve the ambiguity of the assign() template of form (InputIterator, InputIterator) - template - void assign_dispatch(InputIterator first, InputIterator last, false_type); - - // these methods resolve the ambiguity of the assign() template of form (InputIterator, InputIterator) - template - void assign_dispatch(Integral n, Integral x, true_type); - - // this method performs assignment from a range - template - void range_assign(InputIterator first, InputIterator last); - - // this method performs assignment from a range of RandomAccessIterators - template - void range_assign(RandomAccessIterator first, RandomAccessIterator last, thrust::random_access_traversal_tag); - - // this method performs assignment from a range of InputIterators - template - void range_assign(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag); - - // this method performs assignment from a fill value - void fill_assign(size_type n, const T &x); - - // this method allocates new storage and construct copies the given range - template - void allocate_and_copy(size_type requested_size, - ForwardIterator first, ForwardIterator last, - storage_type &new_storage); -}; // end vector_base - -} // end detail - -/*! This function assigns the contents of vector a to vector b and the - * contents of vector b to vector a. - * - * \param a The first vector of interest. After completion, the contents - * of b will be returned here. - * \param b The second vector of interest. After completion, the contents - * of a will be returned here. - */ -template - void swap(detail::vector_base &a, - detail::vector_base &b); - - -/*! This operator allows comparison between two vectors. - * \param lhs The first \p vector to compare. - * \param rhs The second \p vector to compare. - * \return \c true if and only if each corresponding element in either - * \p vector equals the other; \c false, otherwise. - */ -template -bool operator==(const detail::vector_base& lhs, - const detail::vector_base& rhs); - -template -bool operator==(const detail::vector_base& lhs, - const std::vector& rhs); - -template -bool operator==(const std::vector& lhs, - const detail::vector_base& rhs); - -/*! This operator allows comparison between two vectors. - * \param lhs The first \p vector to compare. - * \param rhs The second \p vector to compare. - * \return \c false if and only if each corresponding element in either - * \p vector equals the other; \c true, otherwise. - */ -template -bool operator!=(const detail::vector_base& lhs, - const detail::vector_base& rhs); - -template -bool operator!=(const detail::vector_base& lhs, - const std::vector& rhs); - -template -bool operator!=(const std::vector& lhs, - const detail::vector_base& rhs); - -} // end thrust - -#include - diff --git a/compat/thrust/detail/vector_base.inl b/compat/thrust/detail/vector_base.inl deleted file mode 100644 index 24e6466c18..0000000000 --- a/compat/thrust/detail/vector_base.inl +++ /dev/null @@ -1,1203 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file vector_base.inl - * \brief Inline file for vector_base.h. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace thrust -{ - -namespace detail -{ - -template - vector_base - ::vector_base(void) - :m_storage(), - m_size(0) -{ - ; -} // end vector_base::vector_base() - -template - vector_base - ::vector_base(size_type n) - :m_storage(), - m_size(0) -{ - default_init(n); -} // end vector_base::vector_base() - -template - vector_base - ::vector_base(size_type n, const value_type &value) - :m_storage(), - m_size(0) -{ - fill_init(n,value); -} // end vector_base::vector_base() - -template - vector_base - ::vector_base(const vector_base &v) - :m_storage(), - m_size(0) -{ - range_init(v.begin(), v.end()); -} // end vector_base::vector_base() - -template - vector_base & - vector_base - ::operator=(const vector_base &v) -{ - if(this != &v) - { - assign(v.begin(), v.end()); - } // end if - - return *this; -} // end vector_base::operator=() - -template - template - vector_base - ::vector_base(const vector_base &v) - :m_storage(), - m_size(0) -{ - range_init(v.begin(), v.end()); -} // end vector_base::vector_base() - -template - template - vector_base & - vector_base - ::operator=(const vector_base &v) -{ - assign(v.begin(), v.end()); - - return *this; -} // end vector_base::operator=() - -template - template - vector_base - ::vector_base(const std::vector &v) - :m_storage(), - m_size(0) -{ - range_init(v.begin(), v.end()); -} // end vector_base::vector_base() - -template - template - vector_base & - vector_base - ::operator=(const std::vector &v) -{ - assign(v.begin(), v.end()); - - return *this; -} // end vector_base::operator=() - -template - template - void vector_base - ::init_dispatch(IteratorOrIntegralType n, - IteratorOrIntegralType value, - true_type) -{ - fill_init(n,value); -} // end vector_base::init_dispatch() - -template - void vector_base - ::default_init(size_type n) -{ - if(n > 0) - { - m_storage.allocate(n); - m_size = n; - - m_storage.default_construct_n(begin(), size()); - } // end if -} // end vector_base::default_init() - -template - void vector_base - ::fill_init(size_type n, const T &x) -{ - if(n > 0) - { - m_storage.allocate(n); - m_size = n; - - m_storage.uninitialized_fill_n(begin(), size(), x); - } // end if -} // end vector_base::fill_init() - -template - template - void vector_base - ::init_dispatch(InputIterator first, - InputIterator last, - false_type) -{ - range_init(first, last); -} // end vector_base::init_dispatch() - -template - template - void vector_base - ::range_init(InputIterator first, - InputIterator last) -{ - range_init(first, last, - typename thrust::iterator_traversal::type()); -} // end vector_base::range_init() - -template - template - void vector_base - ::range_init(InputIterator first, - InputIterator last, - thrust::incrementable_traversal_tag) -{ - for(; first != last; ++first) - push_back(*first); -} // end vector_base::range_init() - -template - template - void vector_base - ::range_init(ForwardIterator first, - ForwardIterator last, - thrust::random_access_traversal_tag) -{ - size_type new_size = thrust::distance(first, last); - - allocate_and_copy(new_size, first, last, m_storage); - m_size = new_size; -} // end vector_base::range_init() - -template - template - vector_base - ::vector_base(InputIterator first, - InputIterator last) - :m_storage(), - m_size(0) -{ - // check the type of InputIterator: if it's an integral type, - // we need to interpret this call as (size_type, value_type) - typedef thrust::detail::is_integral Integer; - - init_dispatch(first, last, Integer()); -} // end vector_basee::vector_base() - -template - void vector_base - ::resize(size_type new_size) -{ - if(new_size < size()) - { - iterator new_end = begin(); - thrust::advance(new_end, new_size); - erase(new_end, end()); - } // end if - else - { - append(new_size - size()); - } // end else -} // end vector_base::resize() - -template - void vector_base - ::resize(size_type new_size, const value_type &x) -{ - if(new_size < size()) - { - iterator new_end = begin(); - thrust::advance(new_end, new_size); - erase(new_end, end()); - } // end if - else - { - insert(end(), new_size - size(), x); - } // end else -} // end vector_base::resize() - -template - typename vector_base::size_type - vector_base - ::size(void) const -{ - return m_size; -} // end vector_base::size() - -template - typename vector_base::size_type - vector_base - ::max_size(void) const -{ - return m_storage.max_size(); -} // end vector_base::max_size() - -template - void vector_base - ::reserve(size_type n) -{ - if(n > capacity()) - { - allocate_and_copy(n, begin(), end(), m_storage); - } // end if -} // end vector_base::reserve() - -template - typename vector_base::size_type - vector_base - ::capacity(void) const -{ - return m_storage.size(); -} // end vector_base::capacity() - -template - void vector_base - ::shrink_to_fit(void) -{ - // use the swap trick - vector_base(*this).swap(*this); -} // end vector_base::shrink_to_fit() - -template - typename vector_base::reference - vector_base - ::operator[](const size_type n) -{ - return m_storage[n]; -} // end vector_base::operator[] - -template - typename vector_base::const_reference - vector_base - ::operator[](const size_type n) const -{ - return m_storage[n]; -} // end vector_base::operator[] - -template - typename vector_base::iterator - vector_base - ::begin(void) -{ - return m_storage.begin(); -} // end vector_base::begin() - -template - typename vector_base::const_iterator - vector_base - ::begin(void) const -{ - return m_storage.begin(); -} // end vector_base::begin() - -template - typename vector_base::const_iterator - vector_base - ::cbegin(void) const -{ - return begin(); -} // end vector_base::cbegin() - -template - typename vector_base::reverse_iterator - vector_base - ::rbegin(void) -{ - return reverse_iterator(end()); -} // end vector_base::rbegin() - -template - typename vector_base::const_reverse_iterator - vector_base - ::rbegin(void) const -{ - return const_reverse_iterator(end()); -} // end vector_base::rbegin() - -template - typename vector_base::const_reverse_iterator - vector_base - ::crbegin(void) const -{ - return rbegin(); -} // end vector_base::crbegin() - -template - typename vector_base::iterator - vector_base - ::end(void) -{ - iterator result = begin(); - thrust::advance(result, size()); - return result; -} // end vector_base::end() - -template - typename vector_base::const_iterator - vector_base - ::end(void) const -{ - const_iterator result = begin(); - thrust::advance(result, size()); - return result; -} // end vector_base::end() - -template - typename vector_base::const_iterator - vector_base - ::cend(void) const -{ - return end(); -} // end vector_base::cend() - -template - typename vector_base::reverse_iterator - vector_base - ::rend(void) -{ - return reverse_iterator(begin()); -} // end vector_base::rend() - -template - typename vector_base::const_reverse_iterator - vector_base - ::rend(void) const -{ - return const_reverse_iterator(begin()); -} // end vector_base::rend() - -template - typename vector_base::const_reverse_iterator - vector_base - ::crend(void) const -{ - return rend(); -} // end vector_base::crend() - -template - typename vector_base::const_reference - vector_base - ::front(void) const -{ - return *begin(); -} // end vector_base::front() - -template - typename vector_base::reference - vector_base - ::front(void) -{ - return *begin(); -} // end vector_base::front() - -template - typename vector_base::const_reference - vector_base - ::back(void) const -{ - const_iterator ptr_to_back = end(); - --ptr_to_back; - return *ptr_to_back; -} // end vector_base::vector_base - -template - typename vector_base::reference - vector_base - ::back(void) -{ - iterator ptr_to_back = end(); - --ptr_to_back; - return *ptr_to_back; -} // end vector_base::vector_base - -template - typename vector_base::pointer - vector_base - ::data(void) -{ - return &front(); -} // end vector_base::data() - -template - typename vector_base::const_pointer - vector_base - ::data(void) const -{ - return &front(); -} // end vector_base::data() - -template - vector_base - ::~vector_base(void) -{ - // destroy every living thing - m_storage.destroy(begin(),end()); -} // end vector_base::~vector_base() - -template - void vector_base - ::clear(void) -{ - resize(0); -} // end vector_base::~vector_dev() - -template - bool vector_base - ::empty(void) const -{ - return size() == 0; -} // end vector_base::empty(); - -template - void vector_base - ::push_back(const value_type &x) -{ - insert(end(), x); -} // end vector_base::push_back() - -template - void vector_base - ::pop_back(void) -{ - iterator e = end(); - iterator ptr_to_back = e; - --ptr_to_back; - m_storage.destroy(ptr_to_back, e); - --m_size; -} // end vector_base::pop_back() - -template - typename vector_base::iterator vector_base - ::erase(iterator pos) -{ - iterator end = pos; - ++end; - return erase(pos,end); -} // end vector_base::erase() - -template - typename vector_base::iterator vector_base - ::erase(iterator first, iterator last) -{ - // overlap copy the range [last,end()) to first - // XXX this copy only potentially overlaps - iterator i = thrust::detail::overlapped_copy(last, end(), first); - - // destroy everything after i - m_storage.destroy(i, end()); - - // modify our size - m_size -= (last - first); - - // return an iterator pointing to the position of the first element - // following the erased range - return first; -} // end vector_base::erase() - -template - void vector_base - ::swap(vector_base &v) -{ - thrust::swap(m_storage, v.m_storage); - thrust::swap(m_size, v.m_size); -} // end vector_base::swap() - -template - void vector_base - ::assign(size_type n, const T &x) -{ - fill_assign(n, x); -} // end vector_base::assign() - -template - template - void vector_base - ::assign(InputIterator first, InputIterator last) -{ - // we could have received assign(n, x), so disambiguate on the - // type of InputIterator - typedef typename thrust::detail::is_integral integral; - - assign_dispatch(first, last, integral()); -} // end vector_base::assign() - -template - typename vector_base::allocator_type - vector_base - ::get_allocator(void) const -{ - return m_storage.get_allocator(); -} // end vector_base::get_allocator() - -template - typename vector_base::iterator - vector_base - ::insert(iterator position, const T &x) -{ - // find the index of the insertion - size_type index = thrust::distance(begin(), position); - - // make the insertion - insert(position, 1, x); - - // return an iterator pointing back to position - iterator result = begin(); - thrust::advance(result, index); - return result; -} // end vector_base::insert() - -template - void vector_base - ::insert(iterator position, size_type n, const T &x) -{ - fill_insert(position, n, x); -} // end vector_base::insert() - -template - template - void vector_base - ::insert(iterator position, InputIterator first, InputIterator last) -{ - // we could have received insert(position, n, x), so disambiguate on the - // type of InputIterator - typedef typename thrust::detail::is_integral integral; - - insert_dispatch(position, first, last, integral()); -} // end vector_base::insert() - -template - template - void vector_base - ::assign_dispatch(InputIterator first, InputIterator last, false_type) -{ - range_assign(first, last); -} // end vector_base::assign_dispatch() - -template - template - void vector_base - ::assign_dispatch(Integral n, Integral x, true_type) -{ - fill_assign(n, x); -} // end vector_base::assign_dispatch() - -template - template - void vector_base - ::insert_dispatch(iterator position, InputIterator first, InputIterator last, false_type) -{ - copy_insert(position, first, last); -} // end vector_base::insert_dispatch() - -template - template - void vector_base - ::insert_dispatch(iterator position, Integral n, Integral x, true_type) -{ - fill_insert(position, n, x); -} // end vector_base::insert_dispatch() - -template - template - void vector_base - ::copy_insert(iterator position, - ForwardIterator first, - ForwardIterator last) -{ - if(first != last) - { - // how many new elements will we create? - const size_type num_new_elements = thrust::distance(first, last); - if(capacity() - size() >= num_new_elements) - { - // we've got room for all of them - // how many existing elements will we displace? - const size_type num_displaced_elements = end() - position; - iterator old_end = end(); - - if(num_displaced_elements > num_new_elements) - { - // construct copy n displaced elements to new elements - // following the insertion - m_storage.uninitialized_copy(end() - num_new_elements, end(), end()); - - // extend the size - m_size += num_new_elements; - - // copy num_displaced_elements - num_new_elements elements to existing elements - // this copy overlaps - const size_type copy_length = (old_end - num_new_elements) - position; - thrust::detail::overlapped_copy(position, old_end - num_new_elements, old_end - copy_length); - - // finally, copy the range to the insertion point - thrust::copy(first, last, position); - } // end if - else - { - ForwardIterator mid = first; - thrust::advance(mid, num_displaced_elements); - - // construct copy new elements at the end of the vector - m_storage.uninitialized_copy(mid, last, end()); - - // extend the size - m_size += num_new_elements - num_displaced_elements; - - // construct copy the displaced elements - m_storage.uninitialized_copy(position, old_end, end()); - - // extend the size - m_size += num_displaced_elements; - - // copy to elements which already existed - thrust::copy(first, mid, position); - } // end else - } // end if - else - { - const size_type old_size = size(); - - // compute the new capacity after the allocation - size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, num_new_elements); - - // allocate exponentially larger new storage - new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, 2 * capacity()); - - // do not exceed maximum storage - new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, max_size()); - - if(new_capacity > max_size()) - { - throw std::length_error("insert(): insertion exceeds max_size()."); - } // end if - - storage_type new_storage(new_capacity); - - // record how many constructors we invoke in the try block below - iterator new_end = new_storage.begin(); - - try - { - // construct copy elements before the insertion to the beginning of the newly - // allocated storage - new_end = m_storage.uninitialized_copy(begin(), position, new_storage.begin()); - - // construct copy elements to insert - new_end = m_storage.uninitialized_copy(first, last, new_end); - - // construct copy displaced elements from the old storage to the new storage - // remember [position, end()) refers to the old storage - new_end = m_storage.uninitialized_copy(position, end(), new_end); - } // end try - catch(...) - { - // something went wrong, so destroy & deallocate the new storage - m_storage.destroy(new_storage.begin(), new_end); - new_storage.deallocate(); - - // rethrow - throw; - } // end catch - - // call destructors on the elements in the old storage - m_storage.destroy(begin(), end()); - - // record the vector's new state - m_storage.swap(new_storage); - m_size = old_size + num_new_elements; - } // end else - } // end if -} // end vector_base::copy_insert() - -template - void vector_base - ::append(size_type n) -{ - if(n != 0) - { - if(capacity() - size() >= n) - { - // we've got room for all of them - - // default construct new elements at the end of the vector - m_storage.default_construct_n(end(), n); - - // extend the size - m_size += n; - } // end if - else - { - const size_type old_size = size(); - - // compute the new capacity after the allocation - size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, n); - - // allocate exponentially larger new storage - new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, 2 * capacity()); - - // do not exceed maximum storage - new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, max_size()); - - // create new storage - storage_type new_storage(new_capacity); - - // record how many constructors we invoke in the try block below - iterator new_end = new_storage.begin(); - - try - { - // construct copy all elements into the newly allocated storage - new_end = m_storage.uninitialized_copy(begin(), end(), new_storage.begin()); - - // construct new elements to insert - m_storage.default_construct_n(new_end, n); - new_end += n; - } // end try - catch(...) - { - // something went wrong, so destroy & deallocate the new storage - m_storage.destroy(new_storage.begin(), new_end); - new_storage.deallocate(); - - // rethrow - throw; - } // end catch - - // call destructors on the elements in the old storage - m_storage.destroy(begin(), end()); - - // record the vector's new state - m_storage.swap(new_storage); - m_size = old_size + n; - } // end else - } // end if -} // end vector_base::append() - -template - void vector_base - ::fill_insert(iterator position, size_type n, const T &x) -{ - if(n != 0) - { - if(capacity() - size() >= n) - { - // we've got room for all of them - // how many existing elements will we displace? - const size_type num_displaced_elements = end() - position; - iterator old_end = end(); - - if(num_displaced_elements > n) - { - // construct copy n displaced elements to new elements - // following the insertion - m_storage.uninitialized_copy(end() - n, end(), end()); - - // extend the size - m_size += n; - - // copy num_displaced_elements - n elements to existing elements - // this copy overlaps - const size_type copy_length = (old_end - n) - position; - thrust::detail::overlapped_copy(position, old_end - n, old_end - copy_length); - - // finally, fill the range to the insertion point - thrust::fill_n(position, n, x); - } // end if - else - { - // construct new elements at the end of the vector - m_storage.uninitialized_fill_n(end(), n - num_displaced_elements, x); - - // extend the size - m_size += n - num_displaced_elements; - - // construct copy the displaced elements - m_storage.uninitialized_copy(position, old_end, end()); - - // extend the size - m_size += num_displaced_elements; - - // fill to elements which already existed - thrust::fill(position, old_end, x); - } // end else - } // end if - else - { - const size_type old_size = size(); - - // compute the new capacity after the allocation - size_type new_capacity = old_size + thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (old_size, n); - - // allocate exponentially larger new storage - new_capacity = thrust::max THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, 2 * capacity()); - - // do not exceed maximum storage - new_capacity = thrust::min THRUST_PREVENT_MACRO_SUBSTITUTION (new_capacity, max_size()); - - if(new_capacity > max_size()) - { - throw std::length_error("insert(): insertion exceeds max_size()."); - } // end if - - storage_type new_storage(new_capacity); - - // record how many constructors we invoke in the try block below - iterator new_end = new_storage.begin(); - - try - { - // construct copy elements before the insertion to the beginning of the newly - // allocated storage - new_end = m_storage.uninitialized_copy(begin(), position, new_storage.begin()); - - // construct new elements to insert - m_storage.uninitialized_fill_n(new_end, n, x); - new_end += n; - - // construct copy displaced elements from the old storage to the new storage - // remember [position, end()) refers to the old storage - new_end = m_storage.uninitialized_copy(position, end(), new_end); - } // end try - catch(...) - { - // something went wrong, so destroy & deallocate the new storage - m_storage.destroy(new_storage.begin(), new_end); - new_storage.deallocate(); - - // rethrow - throw; - } // end catch - - // call destructors on the elements in the old storage - m_storage.destroy(begin(), end()); - - // record the vector's new state - m_storage.swap(new_storage); - m_size = old_size + n; - } // end else - } // end if -} // end vector_base::fill_insert() - -template - template - void vector_base - ::range_assign(InputIterator first, - InputIterator last) -{ - // dispatch on traversal - range_assign(first, last, - typename thrust::iterator_traversal::type()); -} // end range_assign() - -template - template - void vector_base - ::range_assign(InputIterator first, - InputIterator last, - thrust::incrementable_traversal_tag) -{ - iterator current(begin()); - - // assign to elements which already exist - for(; first != last && current != end(); ++current, ++first) - { - *current = *first; - } // end for - - // either just the input was exhausted or both - // the input and vector elements were exhausted - if(first == last) - { - // if we exhausted the input, erase leftover elements - erase(current, end()); - } // end if - else - { - // insert the rest of the input at the end of the vector - insert(end(), first, last); - } // end else -} // end vector_base::range_assign() - -template - template - void vector_base - ::range_assign(RandomAccessIterator first, - RandomAccessIterator last, - thrust::random_access_traversal_tag) -{ - const size_type n = thrust::distance(first, last); - - if(n > capacity()) - { - storage_type new_storage; - allocate_and_copy(n, first, last, new_storage); - - // call destructors on the elements in the old storage - m_storage.destroy(begin(), end()); - - // record the vector's new state - m_storage.swap(new_storage); - m_size = n; - } // end if - else if(size() >= n) - { - // we can already accomodate the new range - iterator new_end = thrust::copy(first, last, begin()); - - // destroy the elements we don't need - m_storage.destroy(new_end, end()); - - // update size - m_size = n; - } // end else if - else - { - // range fits inside allocated storage, but some elements - // have not been constructed yet - - // XXX TODO we could possibly implement this with one call - // to transform rather than copy + uninitialized_copy - - // copy to elements which already exist - RandomAccessIterator mid = first; - thrust::advance(mid, size()); - thrust::copy(first, mid, begin()); - - // uninitialize_copy to elements which must be constructed - m_storage.uninitialized_copy(mid, last, end()); - - // update size - m_size = n; - } // end else -} // end vector_base::assign() - -template - void vector_base - ::fill_assign(size_type n, const T &x) -{ - if(n > capacity()) - { - // XXX we should also include a copy of the allocator: - // vector_base temp(n, x, get_allocator()); - vector_base temp(n, x); - temp.swap(*this); - } // end if - else if(n > size()) - { - // fill to existing elements - thrust::fill(begin(), end(), x); - - // construct uninitialized elements - m_storage.uninitialized_fill_n(end(), n - size(), x); - - // adjust size - m_size += (n - size()); - } // end else if - else - { - // fill to existing elements - iterator new_end = thrust::fill_n(begin(), n, x); - - // erase the elements after the fill - erase(new_end, end()); - } // end else -} // end vector_base::fill_assign() - -template - template - void vector_base - ::allocate_and_copy(size_type requested_size, - ForwardIterator first, ForwardIterator last, - storage_type &new_storage) -{ - if(requested_size == 0) - { - new_storage.deallocate(); - return; - } // end if - - // allocate exponentially larger new storage - size_type allocated_size = thrust::max(requested_size, 2 * capacity()); - - // do not exceed maximum storage - allocated_size = thrust::min(allocated_size, max_size()); - - if(requested_size > allocated_size) - { - throw std::length_error("assignment exceeds max_size()."); - } // end if - - new_storage.allocate(allocated_size); - - try - { - // construct the range to the newly allocated storage - m_storage.uninitialized_copy(first, last, new_storage.begin()); - } // end try - catch(...) - { - // something went wrong, so destroy & deallocate the new storage - // XXX seems like this destroys too many elements -- should just be last - first instead of requested_size - iterator new_storage_end = new_storage.begin(); - thrust::advance(new_storage_end, requested_size); - m_storage.destroy(new_storage.begin(), new_storage_end); - new_storage.deallocate(); - - // rethrow - throw; - } // end catch -} // end vector_base::allocate_and_copy() - - -} // end detail - -template - void swap(detail::vector_base &a, - detail::vector_base &b) -{ - a.swap(b); -} // end swap() - - - -namespace detail -{ - -// iterator tags match -template -bool vector_equal(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, - thrust::detail::true_type) -{ - return thrust::equal(first1, last1, first2); -} - -// iterator tags differ -template -bool vector_equal(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, - thrust::detail::false_type) -{ - typename thrust::iterator_difference::type n = thrust::distance(first1,last1); - - typedef typename thrust::iterator_system::type FromSystem1; - typedef typename thrust::iterator_system::type FromSystem2; - - // bring both ranges to the host system - // note that these copies are no-ops if the range is already convertible to the host system - FromSystem1 from_system1; - FromSystem2 from_system2; - thrust::host_system_tag to_system; - thrust::detail::move_to_system rng1(from_system1, to_system, first1, last1); - thrust::detail::move_to_system rng2(from_system2, to_system, first2, first2 + n); - - return thrust::equal(rng1.begin(), rng1.end(), rng2.begin()); -} - -template -bool vector_equal(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2) -{ - typedef typename thrust::iterator_system::type system1; - typedef typename thrust::iterator_system::type system2; - - // dispatch on the sameness of the two systems - return vector_equal(first1, last1, first2, - thrust::detail::is_same()); -} - -} // end namespace detail - - - - -template -bool operator==(const detail::vector_base& lhs, - const detail::vector_base& rhs) -{ - return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); -} - -template -bool operator==(const detail::vector_base& lhs, - const std::vector& rhs) -{ - return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); -} - -template -bool operator==(const std::vector& lhs, - const detail::vector_base& rhs) -{ - return lhs.size() == rhs.size() && detail::vector_equal(lhs.begin(), lhs.end(), rhs.begin()); -} - -template -bool operator!=(const detail::vector_base& lhs, - const detail::vector_base& rhs) -{ - return !(lhs == rhs); -} - -template -bool operator!=(const detail::vector_base& lhs, - const std::vector& rhs) -{ - return !(lhs == rhs); -} - -template -bool operator!=(const std::vector& lhs, - const detail::vector_base& rhs) -{ - return !(lhs == rhs); -} - -} // end thrust - diff --git a/compat/thrust/device_allocator.h b/compat/thrust/device_allocator.h deleted file mode 100644 index a5462d1a28..0000000000 --- a/compat/thrust/device_allocator.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_allocator.h - * \brief An allocator which creates new elements in device memory - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup memory_management_classes Memory Management Classes - * \{ - */ - -template class device_allocator; - -/*! \p device_allocator is a device memory allocator. - * This class is a specialization for \c void. - * - * \see device_ptr - * \see http://www.sgi.com/tech/stl/Allocators.html - */ -template<> - class device_allocator -{ - public: - /*! Type of element allocated, \c void. */ - typedef void value_type; - - /*! Pointer to allocation, \c device_ptr. */ - typedef device_ptr pointer; - - /*! \c const pointer to allocation, \c device_ptr. */ - typedef device_ptr const_pointer; - - /*! Type of allocation size, \c std::size_t. */ - typedef std::size_t size_type; - - /*! Type of allocation difference, \c pointer::difference_type. */ - typedef pointer::difference_type difference_type; - - /*! The \p rebind metafunction provides the type of a \p device_allocator - * instantiated with another type. - * - * \tparam U The other type to use for instantiation. - */ - template - struct rebind - { - /*! The typedef \p other gives the type of the rebound \p device_allocator. - */ - typedef device_allocator other; - }; // end rebind -}; // end device_allocator - -/*! \p device_allocator is a device memory allocator. - * This implementation inherits from \p device_new_allocator. - * - * \see device_ptr - * \see device_new_allocator - * \see http://www.sgi.com/tech/stl/Allocators.html - */ -template - class device_allocator - : public device_new_allocator -{ - public: - /*! The \p rebind metafunction provides the type of a \p device_allocator - * instantiated with another type. - * - * \tparam U The other type to use for instantiation. - */ - template - struct rebind - { - /*! The typedef \p other gives the type of the rebound \p device_allocator. - */ - typedef device_allocator other; - }; // end rebind - - /*! No-argument constructor has no effect. - */ - __host__ __device__ - inline device_allocator() {} - - /*! Copy constructor has no effect. - */ - __host__ __device__ - inline device_allocator(device_allocator const&) {} - - /*! Constructor from other \p allocator has no effect. - */ - template - __host__ __device__ - inline device_allocator(device_allocator const&) {} -}; // end device_allocator - -/*! \} - */ - -} // end thrust - diff --git a/compat/thrust/device_delete.h b/compat/thrust/device_delete.h deleted file mode 100644 index 1df3bb6f46..0000000000 --- a/compat/thrust/device_delete.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_delete.h - * \brief Deletes variables in device memory - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -/*! \addtogroup deallocation_functions Deallocation Functions - * \ingroup memory_management_functions - * \{ - */ - -/*! \p device_delete deletes a \p device_ptr allocated with - * \p device_new. - * - * \param ptr The \p device_ptr to delete, assumed to have - * been allocated with \p device_new. - * \param n The number of objects to destroy at \p ptr. Defaults to \c 1 - * similar to \p device_new. - * - * \see device_ptr - * \see device_new - */ -template - inline void device_delete(thrust::device_ptr ptr, - const size_t n = 1); - -/*! \} - */ - -} // end thrust - -#include - diff --git a/compat/thrust/device_free.h b/compat/thrust/device_free.h deleted file mode 100644 index a734418e58..0000000000 --- a/compat/thrust/device_free.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_free.h - * \brief Deallocates storage allocated by \p device_malloc - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -/*! \addtogroup deallocation_functions Deallocation Functions - * \ingroup memory_management_functions - * \{ - */ - -/*! \p device_free deallocates memory allocated by the function \p device_malloc. - * - * \param ptr A \p device_ptr pointing to memory to be deallocated. - * - * The following code snippet demonstrates how to use \p device_free to - * deallocate memory allocated by \p device_malloc. - * - * \code - * #include - * #include - * ... - * // allocate some integers with device_malloc - * const int N = 100; - * thrust::device_ptr int_array = thrust::device_malloc(N); - * - * // manipulate integers - * ... - * - * // deallocate with device_free - * thrust::device_free(int_array); - * \endcode - * - * \see device_ptr - * \see device_malloc - */ -inline void device_free(thrust::device_ptr ptr); - -/*! \} - */ - -} // end thrust - -#include - diff --git a/compat/thrust/device_malloc.h b/compat/thrust/device_malloc.h deleted file mode 100644 index a3b07234f9..0000000000 --- a/compat/thrust/device_malloc.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_malloc.h - * \brief Allocates storage in device memory - */ - -#pragma once - -#include -#include -#include // for std::size_t - -namespace thrust -{ - -/*! \addtogroup allocation_functions Allocation Functions - * \ingroup memory_management_functions - * \{ - */ - -/*! This version of \p device_malloc allocates sequential device storage - * for bytes. - * - * \param n The number of bytes to allocate sequentially - * in device memory. - * \return A \p device_ptr to the newly allocated memory. - * - * The following code snippet demonstrates how to use \p device_malloc to - * allocate a range of device memory. - * - * \code - * #include - * #include - * ... - * // allocate some memory with device_malloc - * const int N = 100; - * thrust::device_ptr void_ptr = thrust::device_malloc(N); - * - * // manipulate memory - * ... - * - * // deallocate with device_free - * thrust::device_free(void_ptr); - * \endcode - * - * \see device_ptr - * \see device_free - */ -inline thrust::device_ptr device_malloc(const std::size_t n); - -/*! This version of \p device_malloc allocates sequential device storage for - * new objects of the given type. - * - * \param n The number of objects of type T to allocate - * sequentially in device memory. - * \return A \p device_ptr to the newly allocated memory. - * - * The following code snippet demonstrates how to use \p device_malloc to - * allocate a range of device memory. - * - * \code - * #include - * #include - * ... - * // allocate some integers with device_malloc - * const int N = 100; - * thrust::device_ptr int_array = thrust::device_malloc(N); - * - * // manipulate integers - * ... - * - * // deallocate with device_free - * thrust::device_free(int_array); - * \endcode - * - * \see device_ptr - * \see device_free - */ -template - inline thrust::device_ptr device_malloc(const std::size_t n); - -/*! \} - */ - -} // end thrust - -#include - diff --git a/compat/thrust/device_malloc_allocator.h b/compat/thrust/device_malloc_allocator.h deleted file mode 100644 index 404a6d297a..0000000000 --- a/compat/thrust/device_malloc_allocator.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_malloc_allocator.h - * \brief An allocator which allocates storage with \p device_malloc - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -// forward declarations to WAR circular #includes -template class device_ptr; -template device_ptr device_malloc(const std::size_t n); - -/*! \addtogroup memory_management Memory Management - * \addtogroup memory_management_classes Memory Management Classes - * \ingroup memory_management - * \{ - */ - -/*! \p device_malloc_allocator is a device memory allocator that employs the - * \p device_malloc function for allocation. - * - * \see device_malloc - * \see device_ptr - * \see http://www.sgi.com/tech/stl/Allocators.html - */ -template - class device_malloc_allocator -{ - public: - /*! Type of element allocated, \c T. */ - typedef T value_type; - - /*! Pointer to allocation, \c device_ptr. */ - typedef device_ptr pointer; - - /*! \c const pointer to allocation, \c device_ptr. */ - typedef device_ptr const_pointer; - - /*! Reference to allocated element, \c device_reference. */ - typedef device_reference reference; - - /*! \c const reference to allocated element, \c device_reference. */ - typedef device_reference const_reference; - - /*! Type of allocation size, \c std::size_t. */ - typedef std::size_t size_type; - - /*! Type of allocation difference, \c pointer::difference_type. */ - typedef typename pointer::difference_type difference_type; - - /*! The \p rebind metafunction provides the type of a \p device_malloc_allocator - * instantiated with another type. - * - * \tparam U The other type to use for instantiation. - */ - template - struct rebind - { - /*! The typedef \p other gives the type of the rebound \p device_malloc_allocator. - */ - typedef device_malloc_allocator other; - }; // end rebind - - /*! No-argument constructor has no effect. */ - __host__ __device__ - inline device_malloc_allocator() {} - - /*! No-argument destructor has no effect. */ - __host__ __device__ - inline ~device_malloc_allocator() {} - - /*! Copy constructor has no effect. */ - __host__ __device__ - inline device_malloc_allocator(device_malloc_allocator const&) {} - - /*! Constructor from other \p device_malloc_allocator has no effect. */ - template - __host__ __device__ - inline device_malloc_allocator(device_malloc_allocator const&) {} - - /*! Returns the address of an allocated object. - * \return &r. - */ - __host__ __device__ - inline pointer address(reference r) { return &r; } - - /*! Returns the address an allocated object. - * \return &r. - */ - __host__ __device__ - inline const_pointer address(const_reference r) { return &r; } - - /*! Allocates storage for \p cnt objects. - * \param cnt The number of objects to allocate. - * \return A \p pointer to uninitialized storage for \p cnt objects. - * \note Memory allocated by this function must be deallocated with \p deallocate. - */ - __host__ - inline pointer allocate(size_type cnt, - const_pointer = const_pointer(static_cast(0))) - { - if(cnt > this->max_size()) - { - throw std::bad_alloc(); - } // end if - - return pointer(device_malloc(cnt)); - } // end allocate() - - /*! Deallocates storage for objects allocated with \p allocate. - * \param p A \p pointer to the storage to deallocate. - * \param cnt The size of the previous allocation. - * \note Memory deallocated by this function must previously have been - * allocated with \p allocate. - */ - __host__ - inline void deallocate(pointer p, size_type cnt) - { - device_free(p); - } // end deallocate() - - /*! Returns the largest value \c n for which allocate(n) might succeed. - * \return The largest value \c n for which allocate(n) might succeed. - */ - inline size_type max_size() const - { - return (std::numeric_limits::max)() / sizeof(T); - } // end max_size() - - /*! Compares against another \p device_malloc_allocator for equality. - * \return \c true - */ - __host__ __device__ - inline bool operator==(device_malloc_allocator const&) { return true; } - - /*! Compares against another \p device_malloc_allocator for inequality. - * \return \c false - */ - __host__ __device__ - inline bool operator!=(device_malloc_allocator const &a) {return !operator==(a); } -}; // end device_malloc_allocator - -/*! \} - */ - -} // end thrust - - diff --git a/compat/thrust/device_new.h b/compat/thrust/device_new.h deleted file mode 100644 index 001d476896..0000000000 --- a/compat/thrust/device_new.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_new.h - * \brief Constructs new elements in device memory - */ - -#pragma once - -#include - -// #include this for size_t -#include -#include - -namespace thrust -{ - -/*! - * \addtogroup allocation_functions Allocation Functions - * \{ - */ - -/*! \p device_new implements the placement \c new operator for types - * resident in device memory. \p device_new calls T's null - * constructor on a array of objects in device memory. - * No memory is allocated by this function. - * - * \param p A \p device_ptr to a region of device memory into which - * to construct one or many Ts. - * \param n The number of objects to construct at \p p. - * \return p, casted to T's type. - * - * \see device_ptr - */ -template - device_ptr device_new(device_ptr p, - const size_t n = 1); - -/*! \p device_new implements the placement new operator for types - * resident in device memory. \p device_new calls T's copy - * constructor on a array of objects in device memory. No memory is - * allocated by this function. - * - * \param p A \p device_ptr to a region of device memory into which to - * construct one or many Ts. - * \param exemplar The value from which to copy. - * \param n The number of objects to construct at \p p. - * \return p, casted to T's type. - * - * \see device_ptr - * \see fill - */ -template - device_ptr device_new(device_ptr p, - const T &exemplar, - const size_t n = 1); - -/*! \p device_new implements the new operator for types resident in device memory. - * It allocates device memory large enough to hold \p n new objects of type \c T. - * - * \param n The number of objects to allocate. Defaults to \c 1. - * \return A \p device_ptr to the newly allocated region of device memory. - */ -template - device_ptr device_new(const size_t n = 1); - -/*! \} - */ - -} // end thrust - -#include - diff --git a/compat/thrust/device_new_allocator.h b/compat/thrust/device_new_allocator.h deleted file mode 100644 index 527d1fd7b5..0000000000 --- a/compat/thrust/device_new_allocator.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_new_allocator.h - * \brief An allocator which allocates storage with \p device_new - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup memory_management Memory Management - * \addtogroup memory_management_classes Memory Management Classes - * \ingroup memory_management - * \{ - */ - -/*! \p device_new_allocator is a device memory allocator that employs the - * \p device_new function for allocation. - * - * \see device_new - * \see device_ptr - * \see http://www.sgi.com/tech/stl/Allocators.html - */ -template - class device_new_allocator -{ - public: - /*! Type of element allocated, \c T. */ - typedef T value_type; - - /*! Pointer to allocation, \c device_ptr. */ - typedef device_ptr pointer; - - /*! \c const pointer to allocation, \c device_ptr. */ - typedef device_ptr const_pointer; - - /*! Reference to allocated element, \c device_reference. */ - typedef device_reference reference; - - /*! \c const reference to allocated element, \c device_reference. */ - typedef device_reference const_reference; - - /*! Type of allocation size, \c std::size_t. */ - typedef std::size_t size_type; - - /*! Type of allocation difference, \c pointer::difference_type. */ - typedef typename pointer::difference_type difference_type; - - /*! The \p rebind metafunction provides the type of a \p device_new_allocator - * instantiated with another type. - * - * \tparam U The other type to use for instantiation. - */ - template - struct rebind - { - /*! The typedef \p other gives the type of the rebound \p device_new_allocator. - */ - typedef device_new_allocator other; - }; // end rebind - - /*! No-argument constructor has no effect. */ - __host__ __device__ - inline device_new_allocator() {} - - /*! No-argument destructor has no effect. */ - __host__ __device__ - inline ~device_new_allocator() {} - - /*! Copy constructor has no effect. */ - __host__ __device__ - inline device_new_allocator(device_new_allocator const&) {} - - /*! Constructor from other \p device_malloc_allocator has no effect. */ - template - __host__ __device__ - inline device_new_allocator(device_new_allocator const&) {} - - /*! Returns the address of an allocated object. - * \return &r. - */ - __host__ __device__ - inline pointer address(reference r) { return &r; } - - /*! Returns the address an allocated object. - * \return &r. - */ - __host__ __device__ - inline const_pointer address(const_reference r) { return &r; } - - /*! Allocates storage for \p cnt objects. - * \param cnt The number of objects to allocate. - * \return A \p pointer to uninitialized storage for \p cnt objects. - * \note Memory allocated by this function must be deallocated with \p deallocate. - */ - __host__ - inline pointer allocate(size_type cnt, - const_pointer = const_pointer(static_cast(0))) - { - if(cnt > this->max_size()) - { - throw std::bad_alloc(); - } // end if - - // use "::operator new" rather than keyword new - return pointer(device_new(cnt)); - } // end allocate() - - /*! Deallocates storage for objects allocated with \p allocate. - * \param p A \p pointer to the storage to deallocate. - * \param cnt The size of the previous allocation. - * \note Memory deallocated by this function must previously have been - * allocated with \p allocate. - */ - __host__ - inline void deallocate(pointer p, size_type cnt) - { - // use "::operator delete" rather than keyword delete - device_delete(p); - } // end deallocate() - - /*! Returns the largest value \c n for which allocate(n) might succeed. - * \return The largest value \c n for which allocate(n) might succeed. - */ - __host__ __device__ - inline size_type max_size() const - { - return std::numeric_limits::max THRUST_PREVENT_MACRO_SUBSTITUTION () / sizeof(T); - } // end max_size() - - /*! Compares against another \p device_malloc_allocator for equality. - * \return \c true - */ - __host__ __device__ - inline bool operator==(device_new_allocator const&) { return true; } - - /*! Compares against another \p device_malloc_allocator for inequality. - * \return \c false - */ - __host__ __device__ - inline bool operator!=(device_new_allocator const &a) {return !operator==(a); } -}; // end device_new_allocator - -/*! \} - */ - -} // end thrust - diff --git a/compat/thrust/device_ptr.h b/compat/thrust/device_ptr.h deleted file mode 100644 index dfc7e90dc3..0000000000 --- a/compat/thrust/device_ptr.h +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_ptr.h - * \brief A pointer to a variable which resides in the "device" system's memory space - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup memory_management Memory Management - * \addtogroup memory_management_classes Memory Management Classes - * \ingroup memory_management - * \{ - */ - -// forward declarations -template class device_reference; - -/*! \p device_ptr stores a pointer to an object allocated in device memory. This type - * provides type safety when dispatching standard algorithms on ranges resident in - * device memory. - * - * \p device_ptr has pointer semantics: it may be dereferenced safely from the host and - * may be manipulated with pointer arithmetic. - * - * \p device_ptr can be created with the functions device_malloc, device_new, or - * device_pointer_cast, or by explicitly calling its constructor with a raw pointer. - * - * The raw pointer encapsulated by a \p device_ptr may be obtained by either its get - * method or the \p raw_pointer_cast free function. - * - * \note \p device_ptr is not a smart pointer; it is the programmer's responsibility to - * deallocate memory pointed to by \p device_ptr. - * - * \see device_malloc - * \see device_new - * \see device_pointer_cast - * \see raw_pointer_cast - */ -template - class device_ptr - : public thrust::pointer< - T, - thrust::device_system_tag, - thrust::device_reference, - thrust::device_ptr - > -{ - private: - typedef thrust::pointer< - T, - thrust::device_system_tag, - thrust::device_reference, - thrust::device_ptr - > super_t; - - public: - /*! \p device_ptr's null constructor initializes its raw pointer to \c 0. - */ - __host__ __device__ - device_ptr() : super_t() {} - - /*! \p device_ptr's copy constructor is templated to allow copying to a - * device_ptr from a T *. - * - * \param ptr A raw pointer to copy from, presumed to point to a location in - * device memory. - */ - template - __host__ __device__ - explicit device_ptr(OtherT *ptr) : super_t(ptr) {} - - /*! \p device_ptr's copy constructor allows copying from another device_ptr with related type. - * \param other The \p device_ptr to copy from. - */ - template - __host__ __device__ - device_ptr(const device_ptr &other) : super_t(other) {} - - /*! \p device_ptr's assignment operator allows assigning from another \p device_ptr with related type. - * \param other The other \p device_ptr to copy from. - * \return *this - */ - template - __host__ __device__ - device_ptr &operator=(const device_ptr &other) - { - super_t::operator=(other); - return *this; - } - -// declare these members for the purpose of Doxygenating them -// they actually exist in a derived-from class -#if 0 - /*! This method returns this \p device_ptr's raw pointer. - * \return This \p device_ptr's raw pointer. - */ - __host__ __device__ - T *get(void) const; -#endif // end doxygen-only members -}; // end device_ptr - -/*! This operator outputs the value of a \p device_ptr's raw pointer to a \p std::basic_ostream. - * - * \param os The std::basic_ostream of interest. - * \param p The device_ptr of interest. - * \return os. - */ -template -inline std::basic_ostream &operator<<(std::basic_ostream &os, const device_ptr &p); - -/*! \} - */ - - -/*! - * \addtogroup memory_management_functions Memory Management Functions - * \ingroup memory_management - * \{ - */ - -/*! \p device_pointer_cast creates a device_ptr from a raw pointer which is presumed to point - * to a location in device memory. - * - * \param ptr A raw pointer, presumed to point to a location in device memory. - * \return A device_ptr wrapping ptr. - */ -template -__host__ __device__ -inline device_ptr device_pointer_cast(T *ptr); - -/*! This version of \p device_pointer_cast creates a copy of a device_ptr from another device_ptr. - * This version is included for symmetry with \p raw_pointer_cast. - * - * \param ptr A device_ptr. - * \return A copy of \p ptr. - */ -template -__host__ __device__ -inline device_ptr device_pointer_cast(const device_ptr &ptr); - -/*! \} - */ - -} // end thrust - -#include -#include - diff --git a/compat/thrust/device_reference.h b/compat/thrust/device_reference.h deleted file mode 100644 index edae2b59af..0000000000 --- a/compat/thrust/device_reference.h +++ /dev/null @@ -1,969 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_reference.h - * \brief A reference to a variable which resides in the "device" system's memory space - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup memory_management_classes Memory Management Classes - * \ingroup memory_management - * \{ - */ - -/*! \p device_reference acts as a reference-like object to an object stored in device memory. - * \p device_reference is not intended to be used directly; rather, this type - * is the result of deferencing a \p device_ptr. Similarly, taking the address of - * a \p device_reference yields a \p device_ptr. - * - * \p device_reference may often be used from host code in place of operations defined on - * its associated \c value_type. For example, when \p device_reference refers to an - * arithmetic type, arithmetic operations on it are legal: - * - * \code - * #include - * - * int main(void) - * { - * thrust::device_vector vec(1, 13); - * - * thrust::device_reference ref_to_thirteen = vec[0]; - * - * int x = ref_to_thirteen + 1; - * - * // x is 14 - * - * return 0; - * } - * \endcode - * - * Similarly, we can print the value of \c ref_to_thirteen in the above code by using an - * \c iostream: - * - * \code - * #include - * #include - * - * int main(void) - * { - * thrust::device_vector vec(1, 13); - * - * thrust::device_reference ref_to_thirteen = vec[0]; - * - * std::cout << ref_to_thirteen << std::endl; - * - * // 13 is printed - * - * return 0; - * } - * \endcode - * - * Of course, we needn't explicitly create a \p device_reference in the previous - * example, because one is returned by \p device_vector's bracket operator. A more natural - * way to print the value of a \p device_vector element might be: - * - * \code - * #include - * #include - * - * int main(void) - * { - * thrust::device_vector vec(1, 13); - * - * std::cout << vec[0] << std::endl; - * - * // 13 is printed - * - * return 0; - * } - * \endcode - * - * These kinds of operations should be used sparingly in performance-critical code, because - * they imply a potentially expensive copy between host and device space. - * - * Some operations which are possible with regular objects are impossible with their - * corresponding \p device_reference objects due to the requirements of the C++ language. For - * example, because the member access operator cannot be overloaded, member variables and functions - * of a referent object cannot be directly accessed through its \p device_reference. - * - * The following code, which generates a compiler error, illustrates: - * - * \code - * #include - * - * struct foo - * { - * int x; - * }; - * - * int main(void) - * { - * thrust::device_vector foo_vec(1); - * - * thrust::device_reference foo_ref = foo_vec[0]; - * - * foo_ref.x = 13; // ERROR: x cannot be accessed through foo_ref - * - * return 0; - * } - * \endcode - * - * Instead, a host space copy must be created to access \c foo's \c x member: - * - * \code - * #include - * - * struct foo - * { - * int x; - * }; - * - * int main(void) - * { - * thrust::device_vector foo_vec(1); - * - * // create a local host-side foo object - * foo host_foo; - * host_foo.x = 13; - * - * thrust::device_reference foo_ref = foo_vec[0]; - * - * foo_ref = host_foo; - * - * // foo_ref's x member is 13 - * - * return 0; - * } - * \endcode - * - * Another common case where a \p device_reference cannot directly be used in place of - * its referent object occurs when passing them as parameters to functions like \c printf - * which have varargs parameters. Because varargs parameters must be Plain Old Data, a - * \p device_reference to a POD type requires a cast when passed to \c printf: - * - * \code - * #include - * #include - * - * int main(void) - * { - * thrust::device_vector vec(1,13); - * - * // vec[0] must be cast to int when passing to printf - * printf("%d\n", (int) vec[0]); - * - * return 0; - * } - * \endcode - * - * \see device_ptr - * \see device_vector - */ -template - class device_reference - : public thrust::reference< - T, - thrust::device_ptr, - thrust::device_reference - > -{ - private: - typedef thrust::reference< - T, - thrust::device_ptr, - thrust::device_reference - > super_t; - - public: - /*! The type of the value referenced by this type of \p device_reference. - */ - typedef typename super_t::value_type value_type; - - /*! The type of the expression &ref, where ref is a \p device_reference. - */ - typedef typename super_t::pointer pointer; - - /*! This copy constructor accepts a const reference to another - * \p device_reference. After this \p device_reference is constructed, - * it shall refer to the same object as \p other. - * - * \param other A \p device_reference to copy from. - * - * The following code snippet demonstrates the semantics of this - * copy constructor. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_reference ref = v[0]; - * - * // ref equals the object at v[0] - * assert(ref == v[0]); - * - * // the address of ref equals the address of v[0] - * assert(&ref == &v[0]); - * - * // modifying v[0] modifies ref - * v[0] = 13; - * assert(ref == 13); - * \endcode - * - * \note This constructor is templated primarily to allow initialization of - * device_reference from device_reference. - */ - template - __host__ __device__ - device_reference(const device_reference &other, - typename thrust::detail::enable_if_convertible< - typename device_reference::pointer, - pointer - >::type * = 0) - : super_t(other) - {} - - /*! This copy constructor initializes this \p device_reference - * to refer to an object pointed to by the given \p device_ptr. After - * this \p device_reference is constructed, it shall refer to the - * object pointed to by \p ptr. - * - * \param ptr A \p device_ptr to copy from. - * - * The following code snippet demonstrates the semantic of this - * copy constructor. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals the object pointed to by ptr - * assert(ref == *ptr); - * - * // the address of ref equals ptr - * assert(&ref == ptr); - * - * // modifying *ptr modifies ref - * *ptr = 13; - * assert(ref == 13); - * \endcode - */ - __host__ __device__ - explicit device_reference(const pointer &ptr) - : super_t(ptr) - {} - - /*! This assignment operator assigns the value of the object referenced by - * the given \p device_reference to the object referenced by this - * \p device_reference. - * - * \param other The \p device_reference to assign from. - * \return *this - */ - template - __host__ __device__ - device_reference &operator=(const device_reference &other); - - /*! Assignment operator assigns the value of the given value to the - * value referenced by this \p device_reference. - * - * \param x The value to assign from. - * \return *this - */ - __host__ __device__ - device_reference &operator=(const value_type &x); - -// declare these members for the purpose of Doxygenating them -// they actually exist in a derived-from class -#if 0 - /*! Address-of operator returns a \p device_ptr pointing to the object - * referenced by this \p device_reference. It does not return the - * address of this \p device_reference. - * - * \return A \p device_ptr pointing to the object this - * \p device_reference references. - */ - __host__ __device__ - pointer operator&(void) const; - - /*! Conversion operator converts this \p device_reference to T - * by returning a copy of the object referenced by this - * \p device_reference. - * - * \return A copy of the object referenced by this \p device_reference. - */ - __host__ __device__ - operator value_type (void) const; - - /*! swaps the value this \p device_reference references with another. - * \p other The other \p device_reference with which to swap. - */ - __host__ __device__ - void swap(device_reference &other); - - /*! Prefix increment operator increments the object referenced by this - * \p device_reference. - * - * \return *this - * - * The following code snippet demonstrates the semantics of - * \p device_reference's prefix increment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * - * // increment ref - * ++ref; - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * \endcode - * - * \note The increment executes as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator++(void); - - /*! Postfix increment operator copies the object referenced by this - * \p device_reference, increments the object referenced by this - * \p device_reference, and returns the copy. - * - * \return A copy of the object referenced by this \p device_reference - * before being incremented. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's postfix increment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * - * // increment ref - * int x = ref++; - * - * // x equals 0 - * assert(x == 0) - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * \endcode - * - * \note The increment executes as if it were executed on the host. - * This may change in a later version. - */ - value_type operator++(int); - - /*! Addition assignment operator add-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the add-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's addition assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * - * // add-assign ref - * ref += 5; - * - * // ref equals 5 - * assert(ref == 5); - * - * // the object pointed to by ptr equals 5 - * assert(*ptr == 5); - * - * // v[0] equals 5 - * assert(v[0] == 5); - * \endcode - * - * \note The add-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator+=(const T &rhs); - - /*! Prefix decrement operator decrements the object referenced by this - * \p device_reference. - * - * \return *this - * - * The following code snippet demonstrates the semantics of - * \p device_reference's prefix decrement operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * - * // decrement ref - * --ref; - * - * // ref equals -1 - * assert(ref == -1); - * - * // the object pointed to by ptr equals -1 - * assert(*ptr == -1); - * - * // v[0] equals -1 - * assert(v[0] == -1); - * \endcode - * - * \note The decrement executes as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator--(void); - - /*! Postfix decrement operator copies the object referenced by this - * \p device_reference, decrements the object referenced by this - * \p device_reference, and returns the copy. - * - * \return A copy of the object referenced by this \p device_reference - * before being decremented. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's postfix decrement operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * - * // decrement ref - * int x = ref--; - * - * // x equals 0 - * assert(x == 0) - * - * // ref equals -1 - * assert(ref == -1); - * - * // the object pointed to by ptr equals -1 - * assert(*ptr == -1); - * - * // v[0] equals -1 - * assert(v[0] == -1); - * \endcode - * - * \note The decrement executes as if it were executed on the host. - * This may change in a later version. - */ - value_type operator--(int); - - /*! Subtraction assignment operator subtract-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the subtraction-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's addition assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * - * // subtract-assign ref - * ref -= 5; - * - * // ref equals -5 - * assert(ref == -5); - * - * // the object pointed to by ptr equals -5 - * assert(*ptr == -5); - * - * // v[0] equals -5 - * assert(v[0] == -5); - * \endcode - * - * \note The subtract-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator-=(const T &rhs); - - /*! Multiplication assignment operator multiply-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the multiply-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's multiply assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,1); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * - * // multiply-assign ref - * ref *= 5; - * - * // ref equals 5 - * assert(ref == 5); - * - * // the object pointed to by ptr equals 5 - * assert(*ptr == 5); - * - * // v[0] equals 5 - * assert(v[0] == 5); - * \endcode - * - * \note The multiply-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator*=(const T &rhs); - - /*! Division assignment operator divide-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the divide-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's divide assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,5); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 5 - * assert(ref == 5); - * - * // the object pointed to by ptr equals 5 - * assert(*ptr == 5); - * - * // v[0] equals 5 - * assert(v[0] == 5); - * - * // divide-assign ref - * ref /= 5; - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * \endcode - * - * \note The divide-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator/=(const T &rhs); - - /*! Modulation assignment operator modulus-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the divide-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's divide assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,5); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 5 - * assert(ref == 5); - * - * // the object pointed to by ptr equals 5 - * assert(*ptr == 5); - * - * // v[0] equals 5 - * assert(v[0] == 5); - * - * // modulus-assign ref - * ref %= 5; - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * \endcode - * - * \note The modulus-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator%=(const T &rhs); - - /*! Bitwise left shift assignment operator left shift-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the left shift-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's left shift assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,1); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * - * // left shift-assign ref - * ref <<= 1; - * - * // ref equals 2 - * assert(ref == 2); - * - * // the object pointed to by ptr equals 2 - * assert(*ptr == 2); - * - * // v[0] equals 2 - * assert(v[0] == 2); - * \endcode - * - * \note The left shift-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator<<=(const T &rhs); - - /*! Bitwise right shift assignment operator right shift-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the right shift-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's right shift assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,2); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 2 - * assert(ref == 2); - * - * // the object pointed to by ptr equals 2 - * assert(*ptr == 2); - * - * // v[0] equals 2 - * assert(v[0] == 2); - * - * // right shift-assign ref - * ref >>= 1; - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * \endcode - * - * \note The right shift-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator>>=(const T &rhs); - - /*! Bitwise AND assignment operator AND-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the AND-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's AND assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,1); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * - * // right AND-assign ref - * ref &= 0; - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * \endcode - * - * \note The AND-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator&=(const T &rhs); - - /*! Bitwise OR assignment operator OR-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the OR-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's OR assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,0); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * - * // right OR-assign ref - * ref |= 1; - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * \endcode - * - * \note The OR-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator|=(const T &rhs); - - /*! Bitwise XOR assignment operator XOR-assigns the object referenced by this - * \p device_reference and returns this \p device_reference. - * - * \param rhs The right hand side of the XOR-assignment. - * \return *this. - * - * The following code snippet demonstrates the semantics of - * \p device_reference's XOR assignment operator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(1,1); - * thrust::device_ptr ptr = &v[0]; - * thrust::device_reference ref(ptr); - * - * // ref equals 1 - * assert(ref == 1); - * - * // the object pointed to by ptr equals 1 - * assert(*ptr == 1); - * - * // v[0] equals 1 - * assert(v[0] == 1); - * - * // right XOR-assign ref - * ref ^= 1; - * - * // ref equals 0 - * assert(ref == 0); - * - * // the object pointed to by ptr equals 0 - * assert(*ptr == 0); - * - * // v[0] equals 0 - * assert(v[0] == 0); - * \endcode - * - * \note The XOR-assignment executes as as if it were executed on the host. - * This may change in a later version. - */ - device_reference &operator^=(const T &rhs); -#endif // end doxygen-only members -}; // end device_reference - -/*! swaps the value of one \p device_reference with another. - * \p x The first \p device_reference of interest. - * \p y The second \p device_reference of interest. - */ -template -__host__ __device__ -void swap(device_reference &x, device_reference &y); - -/*! \} - */ - -} // end thrust - -#include - diff --git a/compat/thrust/device_vector.h b/compat/thrust/device_vector.h deleted file mode 100644 index 8c9d0051a2..0000000000 --- a/compat/thrust/device_vector.h +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file device_vector.h - * \brief A dynamically-sizable array of elements which reside in the "device" memory space - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -// forward declaration of host_vector -template class host_vector; - -/*! \addtogroup container_classes Container Classes - * \addtogroup device_containers Device Containers - * \ingroup container_classes - * \{ - */ - -/*! A \p device_vector is a container that supports random access to elements, - * constant time removal of elements at the end, and linear time insertion - * and removal of elements at the beginning or in the middle. The number of - * elements in a \p device_vector may vary dynamically; memory management is - * automatic. The memory associated with a \p device_vector resides in the memory - * space of a parallel device. - * - * \see http://www.sgi.com/tech/stl/Vector.html - * \see host_vector - */ -template > - class device_vector - : public detail::vector_base -{ - private: - typedef detail::vector_base Parent; - - public: - /*! \cond */ - typedef typename Parent::size_type size_type; - typedef typename Parent::value_type value_type; - /*! \endcond */ - - /*! This constructor creates an empty \p device_vector. - */ - __host__ - device_vector(void) - :Parent() {} - - /*! This constructor creates a \p device_vector with the given - * size. - * \param n The number of elements to initially craete. - */ - __host__ - explicit device_vector(size_type n) - :Parent(n) {} - - /*! This constructor creates a \p device_vector with copies - * of an exemplar element. - * \param n The number of elements to initially create. - * \param value An element to copy. - */ - __host__ - explicit device_vector(size_type n, const value_type &value) - :Parent(n,value) {} - - /*! Copy constructor copies from an exemplar \p device_vector. - * \param v The \p device_vector to copy. - */ - __host__ - device_vector(const device_vector &v) - :Parent(v) {} - - /*! Copy constructor copies from an exemplar \p device_vector with different type. - * \param v The \p device_vector to copy. - */ - template - __device__ - device_vector(const device_vector &v) - :Parent(v) {} - - /*! Assign operator copies from an exemplar \p device_vector with different type. - * \param v The \p device_vector to copy. - */ - template - __device__ - device_vector &operator=(const device_vector &v) - { Parent::operator=(v); return *this; } - - /*! Copy constructor copies from an exemplar \c std::vector. - * \param v The std::vector to copy. - */ - template - __host__ - device_vector(const std::vector &v) - :Parent(v) {} - - /*! Assign operator copies from an exemplar std::vector. - * \param v The std::vector to copy. - */ - template - __host__ - device_vector &operator=(const std::vector &v) - { Parent::operator=(v); return *this;} - - /*! Copy constructor copies from an exemplar \p host_vector with possibly different type. - * \param v The \p host_vector to copy. - */ - template - __host__ - device_vector(const host_vector &v); - - /*! Assign operator copies from an examplar \p host_vector. - * \param v The \p host_vector to copy. - */ - template - __host__ - device_vector &operator=(const host_vector &v) - { Parent::operator=(v); return *this; } - - /*! This constructor builds a \p device_vector from a range. - * \param first The beginning of the range. - * \param last The end of the range. - */ - template - __host__ - device_vector(InputIterator first, InputIterator last) - :Parent(first,last) {} - -// declare these members for the purpose of Doxygenating them -// they actually exist in a derived-from class -#if 0 - /*! \brief Resizes this vector to the specified number of elements. - * \param new_size Number of elements this vector should contain. - * \param x Data with which new elements should be populated. - * \throw std::length_error If n exceeds max_size(). - * - * This method will resize this vector to the specified number of - * elements. If the number is smaller than this vector's current - * size this vector is truncated, otherwise this vector is - * extended and new elements are populated with given data. - */ - void resize(size_type new_size, const value_type &x = value_type()); - - /*! Returns the number of elements in this vector. - */ - size_type size(void) const; - - /*! Returns the size() of the largest possible vector. - * \return The largest possible return value of size(). - */ - size_type max_size(void) const; - - /*! \brief If n is less than or equal to capacity(), this call has no effect. - * Otherwise, this method is a request for allocation of additional memory. If - * the request is successful, then capacity() is greater than or equal to - * n; otherwise, capacity() is unchanged. In either case, size() is unchanged. - * \throw std::length_error If n exceeds max_size(). - */ - void reserve(size_type n); - - /*! Returns the number of elements which have been reserved in this - * vector. - */ - size_type capacity(void) const; - - /*! This method shrinks the capacity of this vector to exactly - * fit its elements. - */ - void shrink_to_fit(void); - - /*! \brief Subscript access to the data contained in this vector_dev. - * \param n The index of the element for which data should be accessed. - * \return Read/write reference to data. - * - * This operator allows for easy, array-style, data access. - * Note that data access with this operator is unchecked and - * out_of_range lookups are not defined. - */ - reference operator[](size_type n); - - /*! \brief Subscript read access to the data contained in this vector_dev. - * \param n The index of the element for which data should be accessed. - * \return Read reference to data. - * - * This operator allows for easy, array-style, data access. - * Note that data access with this operator is unchecked and - * out_of_range lookups are not defined. - */ - const_reference operator[](size_type n) const; - - /*! This method returns an iterator pointing to the beginning of - * this vector. - * \return mStart - */ - iterator begin(void); - - /*! This method returns a const_iterator pointing to the beginning - * of this vector. - * \return mStart - */ - const_iterator begin(void) const; - - /*! This method returns a const_iterator pointing to the beginning - * of this vector. - * \return mStart - */ - const_iterator cbegin(void) const; - - /*! This method returns a reverse_iterator pointing to the beginning of - * this vector's reversed sequence. - * \return A reverse_iterator pointing to the beginning of this - * vector's reversed sequence. - */ - reverse_iterator rbegin(void); - - /*! This method returns a const_reverse_iterator pointing to the beginning of - * this vector's reversed sequence. - * \return A const_reverse_iterator pointing to the beginning of this - * vector's reversed sequence. - */ - const_reverse_iterator rbegin(void) const; - - /*! This method returns a const_reverse_iterator pointing to the beginning of - * this vector's reversed sequence. - * \return A const_reverse_iterator pointing to the beginning of this - * vector's reversed sequence. - */ - const_reverse_iterator crbegin(void) const; - - /*! This method returns an iterator pointing to one element past the - * last of this vector. - * \return begin() + size(). - */ - iterator end(void); - - /*! This method returns a const_iterator pointing to one element past the - * last of this vector. - * \return begin() + size(). - */ - const_iterator end(void) const; - - /*! This method returns a const_iterator pointing to one element past the - * last of this vector. - * \return begin() + size(). - */ - const_iterator cend(void) const; - - /*! This method returns a reverse_iterator pointing to one element past the - * last of this vector's reversed sequence. - * \return rbegin() + size(). - */ - reverse_iterator rend(void); - - /*! This method returns a const_reverse_iterator pointing to one element past the - * last of this vector's reversed sequence. - * \return rbegin() + size(). - */ - const_reverse_iterator rend(void) const; - - /*! This method returns a const_reverse_iterator pointing to one element past the - * last of this vector's reversed sequence. - * \return rbegin() + size(). - */ - const_reverse_iterator crend(void) const; - - /*! This method returns a const_reference referring to the first element of this - * vector. - * \return The first element of this vector. - */ - const_reference front(void) const; - - /*! This method returns a reference pointing to the first element of this - * vector. - * \return The first element of this vector. - */ - reference front(void); - - /*! This method returns a const reference pointing to the last element of - * this vector. - * \return The last element of this vector. - */ - const_reference back(void) const; - - /*! This method returns a reference referring to the last element of - * this vector_dev. - * \return The last element of this vector. - */ - reference back(void); - - /*! This method returns a pointer to this vector's first element. - * \return A pointer to the first element of this vector. - */ - pointer data(void); - - /*! This method returns a const_pointer to this vector's first element. - * \return a const_pointer to the first element of this vector. - */ - const_pointer data(void) const; - - /*! This method resizes this vector to 0. - */ - void clear(void); - - /*! This method returns true iff size() == 0. - * \return true if size() == 0; false, otherwise. - */ - bool empty(void) const; - - /*! This method appends the given element to the end of this vector. - * \param x The element to append. - */ - void push_back(const value_type &x); - - /*! This method erases the last element of this vector, invalidating - * all iterators and references to it. - */ - void pop_back(void); - - /*! This method swaps the contents of this vector_base with another vector. - * \param v The vector with which to swap. - */ - void swap(device_vector &v); - - /*! This method removes the element at position pos. - * \param pos The position of the element of interest. - * \return An iterator pointing to the new location of the element that followed the element - * at position pos. - */ - iterator erase(iterator pos); - - /*! This method removes the range of elements [first,last) from this vector. - * \param first The beginning of the range of elements to remove. - * \param last The end of the range of elements to remove. - * \return An iterator pointing to the new location of the element that followed the last - * element in the sequence [first,last). - */ - iterator erase(iterator first, iterator last); - - /*! This method inserts a single copy of a given exemplar value at the - * specified position in this vector. - * \param position The insertion position. - * \param x The exemplar element to copy & insert. - * \return An iterator pointing to the newly inserted element. - */ - iterator insert(iterator position, const T &x); - - /*! This method inserts a copy of an exemplar value to a range at the - * specified position in this vector. - * \param position The insertion position - * \param n The number of insertions to perform. - * \param x The value to replicate and insert. - */ - void insert(iterator position, size_type n, const T &x); - - /*! This method inserts a copy of an input range at the specified position - * in this vector. - * \param position The insertion position. - * \param first The beginning of the range to copy. - * \param last The end of the range to copy. - * - * \tparam InputIterator is a model of Assignable. - */ - template - void insert(iterator position, InputIterator first, InputIterator last); - - /*! This version of \p assign replicates a given exemplar - * \p n times into this vector. - * \param n The number of times to copy \p x. - * \param x The exemplar element to replicate. - */ - void assign(size_type n, const T &x); - - /*! This version of \p assign makes this vector a copy of a given input range. - * \param first The beginning of the range to copy. - * \param last The end of the range to copy. - * - * \tparam InputIterator is a model of Input Iterator. - */ - template - void assign(InputIterator first, InputIterator last); - - /*! This method returns a copy of this vector's allocator. - * \return A copy of the alloctor used by this vector. - */ - allocator_type get_allocator(void) const; -#endif // end doxygen-only members -}; // end device_vector - -/*! \} - */ - -} // end thrust - -#include - - diff --git a/compat/thrust/distance.h b/compat/thrust/distance.h deleted file mode 100644 index 67b41946bf..0000000000 --- a/compat/thrust/distance.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file distance.h - * \brief Computes the size of a range - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup iterators - * \{ - */ - -/*! \p distance finds the distance between \p first and \p last, i.e. the - * number of times that \p first must be incremented until it is equal to - * \p last. - * - * \param first The beginning of an input range of interest. - * \param last The end of an input range of interest. - * \return The distance between the beginning and end of the input range. - * - * \tparam InputIterator is a model of Input Iterator. - * - * \pre If \c InputIterator meets the requirements of random access iterator, \p last shall be reachable from \p first or - * \p first shall be reachable from \p last; otherwise, \p last shall be reachable from \p first. - * - * The following code snippet demonstrates how to use \p distance to compute - * the distance to one iterator from another. - * - * \code - * #include - * #include - * ... - * thrust::device_vector vec(13); - * thrust::device_vector::iterator iter1 = vec.begin(); - * thrust::device_vector::iterator iter2 = iter1 + 7; - * - * int d = thrust::distance(iter1, iter2); - * - * // d is 7 - * \endcode - * - * \see http://www.sgi.com/tech/stl/distance.html - */ -template - inline typename thrust::iterator_traits::difference_type - distance(InputIterator first, InputIterator last); - -/*! \} // end iterators - */ - -} // end thrust - -#include - diff --git a/compat/thrust/equal.h b/compat/thrust/equal.h deleted file mode 100644 index e96946fcf7..0000000000 --- a/compat/thrust/equal.h +++ /dev/null @@ -1,236 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file equal.h - * \brief Equality between ranges - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup reductions - * \{ - * \addtogroup comparisons - * \ingroup reductions - * \{ - */ - - -/*! \p equal returns \c true if the two ranges [first1, last1) - * and [first2, first2 + (last1 - first1)) are identical when - * compared element-by-element, and otherwise returns \c false. - * - * This version of \p equal returns \c true if and only if for every - * iterator \c i in [first1, last1), *i == *(first2 + (i - first1)). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \return \c true, if the sequences are equal; \c false, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator1's \c value_type is a model of Equality Comparable, - * and \p InputIterator1's \c value_type can be compared for equality with \c InputIterator2's \c value_type. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is a model of Equality Comparable, - * and \p InputIterator2's \c value_type can be compared for equality with \c InputIterator1's \c value_type. - * - * The following code snippet demonstrates how to use \p equal to test - * two ranges for equality using the \p thrust::host execution policy: - * - * \code - * #include - * #include - * ... - * int A1[7] = {3, 1, 4, 1, 5, 9, 3}; - * int A2[7] = {3, 1, 4, 2, 8, 5, 7}; - * ... - * bool result = thrust::equal(thrust::host, A1, A1 + 7, A2); - * - * // result == false - * \endcode - * - * \see http://www.sgi.com/tech/stl/equal.html - */ -template -bool equal(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2); - - -/*! \p equal returns \c true if the two ranges [first1, last1) - * and [first2, first2 + (last1 - first1)) are identical when - * compared element-by-element, and otherwise returns \c false. - * - * This version of \p equal returns \c true if and only if for every - * iterator \c i in [first1, last1), *i == *(first2 + (i - first1)). - * - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \return \c true, if the sequences are equal; \c false, otherwise. - * - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator1's \c value_type is a model of Equality Comparable, - * and \p InputIterator1's \c value_type can be compared for equality with \c InputIterator2's \c value_type. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is a model of Equality Comparable, - * and \p InputIterator2's \c value_type can be compared for equality with \c InputIterator1's \c value_type. - * - * The following code snippet demonstrates how to use \p equal to test - * two ranges for equality. - * - * \code - * #include - * ... - * int A1[7] = {3, 1, 4, 1, 5, 9, 3}; - * int A2[7] = {3, 1, 4, 2, 8, 5, 7}; - * ... - * bool result = thrust::equal(A1, A1 + 7, A2); - * - * // result == false - * \endcode - * - * \see http://www.sgi.com/tech/stl/equal.html - */ -template -bool equal(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2); - - -/*! \p equal returns \c true if the two ranges [first1, last1) - * and [first2, first2 + (last1 - first1)) are identical when - * compared element-by-element, and otherwise returns \c false. - * - * This version of \p equal returns \c true if and only if for every - * iterator \c i in [first1, last1), - * binary_pred(*i, *(first2 + (i - first1))) is \c true. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \param binary_pred Binary predicate used to test element equality. - * \return \c true, if the sequences are equal; \c false, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator1's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p BinaryPredicate's \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p equal to compare the - * elements in two ranges modulo 2 using the \p thrust::host execution policy. - * - * \code - * #include - * #include - * ... - * - * __host__ __device__ - * struct compare_modulo_two - * { - * bool operator()(int x, int y) - * { - * return (x % 2) == (y % 2); - * } - * }; - * ... - * int x[5] = {0, 2, 4, 6, 8, 10}; - * int y[5] = {1, 3, 5, 7, 9, 11}; - * - * bool result = thrust::equal(x, x + 5, y, compare_modulo_two()); - * - * // result is true - * \endcode - * - * \see http://www.sgi.com/tech/stl/equal.html - */ -template -bool equal(const thrust::detail::execution_policy_base &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred); - - -/*! \p equal returns \c true if the two ranges [first1, last1) - * and [first2, first2 + (last1 - first1)) are identical when - * compared element-by-element, and otherwise returns \c false. - * - * This version of \p equal returns \c true if and only if for every - * iterator \c i in [first1, last1), - * binary_pred(*i, *(first2 + (i - first1))) is \c true. - * - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \param binary_pred Binary predicate used to test element equality. - * \return \c true, if the sequences are equal; \c false, otherwise. - * - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator1's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p BinaryPredicate's \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p equal to compare the - * elements in two ranges modulo 2. - * - * \code - * #include - * - * __host__ __device__ - * struct compare_modulo_two - * { - * bool operator()(int x, int y) - * { - * return (x % 2) == (y % 2); - * } - * }; - * ... - * int x[5] = {0, 2, 4, 6, 8, 10}; - * int y[5] = {1, 3, 5, 7, 9, 11}; - * - * bool result = thrust::equal(x, x + 5, y, compare_modulo_two()); - * - * // result is true - * \endcode - * - * \see http://www.sgi.com/tech/stl/equal.html - */ -template -bool equal(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, BinaryPredicate binary_pred); - - -/*! \} // end comparisons - * \} // end reductions - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/execution_policy.h b/compat/thrust/execution_policy.h deleted file mode 100644 index a5b61e95b4..0000000000 --- a/compat/thrust/execution_policy.h +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/execution_policy.h - * \brief Thrust execution policies. - */ - -#pragma once - -#include - -// get the definition of thrust::execution_policy -#include - -// #include the host system's execution_policy header -#define __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER <__THRUST_HOST_SYSTEM_ROOT/execution_policy.h> -#include __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER -#undef __THRUST_HOST_SYSTEM_EXECUTION_POLICY_HEADER - -// #include the device system's execution_policy.h header -#define __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/execution_policy.h> -#include __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER -#undef __THRUST_DEVICE_SYSTEM_EXECUTION_POLICY_HEADER - -namespace thrust -{ - - -/*! \cond - */ - - -namespace detail -{ - - -typedef thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::detail::par_t host_t; - - -typedef thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::detail::par_t device_t; - - -} // end detail - - -/*! \endcond - */ - - -/*! \addtogroup execution_policies Parallel Execution Policies - * \{ - */ - - -// define execution_policy for the purpose of Doxygenating it -// it is actually defined elsewhere -#if 0 -/*! \p execution_policy is the base class for all Thrust parallel execution policies - * like \p thrust::host, \p thrust::device, and each backend system's tag type. - * - * Custom user-defined backends should derive a policy from this type in order to - * interoperate with Thrust algorithm dispatch. - * - * The following code snippet demonstrates how to derive a standalone custom execution policy - * from \p thrust::execution_policy to implement a backend which only implements \p for_each: - * - * \code - * #include - * #include - * - * // define a type derived from thrust::execution_policy to distinguish our custom execution policy: - * struct my_policy : thrust::execution_policy {}; - * - * // overload for_each on my_policy - * template - * Iterator for_each(my_policy, Iterator first, Iterator last, Function f) - * { - * std::cout << "Hello, world from for_each(my_policy)!" << std::endl; - * - * for(; first < last; ++first) - * { - * f(*first); - * } - * - * return first; - * } - * - * struct ignore_argument - * { - * void operator()(int) {} - * }; - * - * int main() - * { - * int data[4]; - * - * // dispatch thrust::for_each using our custom policy: - * my_policy exec; - * thrust::for_each(exec, data, data + 4, ignore_argument()); - * - * // can't dispatch thrust::transform because no overload exists for my_policy: - * //thrust::transform(exec, data, data, + 4, data, thrust::identity()); // error! - * - * return 0; - * } - * \endcode - * - * \see host_execution_policy - * \see device_execution_policy - */ -template -struct execution_policy : thrust::detail::execution_policy_base -{}; -#endif - - -/*! \p host_execution_policy is the base class for all Thrust parallel execution policies - * which are derived from Thrust's default host backend system configured with the \p THRUST_HOST_SYSTEM - * macro. - * - * Custom user-defined backends which wish to inherit the functionality of Thrust's host backend system - * should derive a policy from this type in order to interoperate with Thrust algorithm dispatch. - * - * The following code snippet demonstrates how to derive a standalone custom execution policy from - * \p thrust::host_execution_policy to implement a backend which specializes \p for_each while inheriting - * the behavior of every other algorithm from the host system: - * - * \code - * #include - * #include - * - * // define a type derived from thrust::host_execution_policy to distinguish our custom execution policy: - * struct my_policy : thrust::host_execution_policy {}; - * - * // overload for_each on my_policy - * template - * Iterator for_each(my_policy, Iterator first, Iterator last, Function f) - * { - * std::cout << "Hello, world from for_each(my_policy)!" << std::endl; - * - * for(; first < last; ++first) - * { - * f(*first); - * } - * - * return first; - * } - * - * struct ignore_argument - * { - * void operator()(int) {} - * }; - * - * int main() - * { - * int data[4]; - * - * // dispatch thrust::for_each using our custom policy: - * my_policy exec; - * thrust::for_each(exec, data, data + 4, ignore_argument()); - * - * // dispatch thrust::transform whose behavior our policy inherits - * thrust::transform(exec, data, data, + 4, data, thrust::identity()); - * - * return 0; - * } - * \endcode - * - * \see execution_policy - * \see device_execution_policy - */ -template - struct host_execution_policy - : thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::execution_policy -{}; - - -/*! \p device_execution_policy is the base class for all Thrust parallel execution policies - * which are derived from Thrust's default device backend system configured with the \p THRUST_DEVICE_SYSTEM - * macro. - * - * Custom user-defined backends which wish to inherit the functionality of Thrust's device backend system - * should derive a policy from this type in order to interoperate with Thrust algorithm dispatch. - * - * The following code snippet demonstrates how to derive a standalone custom execution policy from - * \p thrust::device_execution_policy to implement a backend which specializes \p for_each while inheriting - * the behavior of every other algorithm from the device system: - * - * \code - * #include - * #include - * - * // define a type derived from thrust::device_execution_policy to distinguish our custom execution policy: - * struct my_policy : thrust::device_execution_policy {}; - * - * // overload for_each on my_policy - * template - * Iterator for_each(my_policy, Iterator first, Iterator last, Function f) - * { - * std::cout << "Hello, world from for_each(my_policy)!" << std::endl; - * - * for(; first < last; ++first) - * { - * f(*first); - * } - * - * return first; - * } - * - * struct ignore_argument - * { - * void operator()(int) {} - * }; - * - * int main() - * { - * int data[4]; - * - * // dispatch thrust::for_each using our custom policy: - * my_policy exec; - * thrust::for_each(exec, data, data + 4, ignore_argument()); - * - * // dispatch thrust::transform whose behavior our policy inherits - * thrust::transform(exec, data, data, + 4, data, thrust::identity()); - * - * return 0; - * } - * \endcode - * - * \see execution_policy - * \see host_execution_policy - */ -template - struct device_execution_policy - : thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::execution_policy -{}; - - -/*! \p thrust::host is the default parallel execution policy associated with Thrust's host backend system - * configured by the \p THRUST_HOST_SYSTEM macro. - * - * Instead of relying on implicit algorithm dispatch through iterator system tags, users may directly target - * algorithm dispatch at Thrust's host system by providing \p thrust::host as an algorithm parameter. - * - * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such as - * \p thrust::host_vector. - * - * Note that even though \p thrust::host targets the host CPU, it is a parallel execution policy. That is, - * the order that an algorithm invokes functors or dereferences iterators is not defined. - * - * The type of \p thrust::host is implementation-defined. - * - * The following code snippet demonstrates how to use \p thrust::host to explicitly dispatch an invocation - * of \p thrust::for_each to the host backend system: - * - * \code - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * printf("%d\n"); - * } - * }; - * ... - * int vec[3]; - * vec[0] = 0; vec[1] = 1; vec[2] = 2; - * - * thrust::for_each(thrust::host, vec.begin(), vec.end(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - * - * \see host_execution_policy - * \see thrust::device - */ -static const detail::host_t host; - - -/*! \p thrust::device is the default parallel execution policy associated with Thrust's device backend system - * configured by the \p THRUST_DEVICE_SYSTEM macro. - * - * Instead of relying on implicit algorithm dispatch through iterator system tags, users may directly target - * algorithm dispatch at Thrust's device system by providing \p thrust::device as an algorithm parameter. - * - * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such as - * \p thrust::device_vector or to avoid wrapping e.g. raw pointers allocated by the CUDA API with types - * such as \p thrust::device_ptr. - * - * The user must take care to guarantee that the iterators provided to an algorithm are compatible with - * the device backend system. For example, raw pointers allocated by std::malloc typically - * cannot be dereferenced by a GPU. For this reason, raw pointers allocated by host APIs should not be mixed - * with a \p thrust::device algorithm invocation when the device backend is CUDA. - * - * The type of \p thrust::device is implementation-defined. - * - * The following code snippet demonstrates how to use \p thrust::device to explicitly dispatch an invocation - * of \p thrust::for_each to the device backend system: - * - * \code - * #include - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * printf("%d\n"); - * } - * }; - * ... - * thrust::device_vector d_vec[3]; - * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; - * - * thrust::for_each(thrust::device, vec.begin(), vec.end(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - * - * \see host_execution_policy - * \see thrust::device - */ -static const detail::device_t device; - - -/*! \} - */ - - -} // end thrust - diff --git a/compat/thrust/extrema.h b/compat/thrust/extrema.h deleted file mode 100644 index 335bcd1e6b..0000000000 --- a/compat/thrust/extrema.h +++ /dev/null @@ -1,798 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file extrema.h - * \brief Functions for computing computing extremal values - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! This version of \p min returns the smaller of two values, given a comparison operation. - * \param lhs The first value to compare. - * \param rhs The second value to compare. - * \param comp A comparison operation. - * \return The smaller element. - * - * \tparam T is convertible to \p BinaryPredicate's first argument type and to its second argument type. - * \tparam BinaryPredicate is a model of BinaryPredicate. - * - * The following code snippet demonstrates how to use \p min to compute the smaller of two - * key-value objects. - * - * \code - * #include - * ... - * struct key_value - * { - * int key; - * int value; - * }; - * - * struct compare_key_value - * { - * __host__ __device__ - * bool operator()(key_value lhs, key_value rhs) - * { - * return lhs.key < rhs.key; - * } - * }; - * - * ... - * key_value a = {13, 0}; - * key_value b = { 7, 1); - * - * key_value smaller = thrust::min(a, b, compare_key_value()); - * - * // smaller is {7, 1} - * \endcode - * - * \note Returns the first argument when the arguments are equivalent. - * \see max - */ -template -__host__ __device__ - T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp); - - -/*! This version of \p min returns the smaller of two values. - * \param lhs The first value to compare. - * \param rhs The second value to compare. - * \return The smaller element. - * - * \tparam T is a model of LessThan Comparable. - * - * The following code snippet demonstrates how to use \p min to compute the smaller of two - * integers. - * - * \code - * #include - * ... - * int a = 13; - * int b = 7; - * - * int smaller = thrust::min(a, b); - * - * // smaller is 7 - * \endcode - * - * \note Returns the first argument when the arguments are equivalent. - * \see max - */ -template -__host__ __device__ - T min THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs); - - -/*! This version of \p max returns the larger of two values, given a comparison operation. - * \param lhs The first value to compare. - * \param rhs The second value to compare. - * \param comp A comparison operation. - * \return The larger element. - * - * \tparam T is convertible to \p BinaryPredicate's first argument type and to its second argument type. - * \tparam BinaryPredicate is a model of BinaryPredicate. - * - * The following code snippet demonstrates how to use \p max to compute the larger of two - * key-value objects. - * - * \code - * #include - * ... - * struct key_value - * { - * int key; - * int value; - * }; - * - * struct compare_key_value - * { - * __host__ __device__ - * bool operator()(key_value lhs, key_value rhs) - * { - * return lhs.key < rhs.key; - * } - * }; - * - * ... - * key_value a = {13, 0}; - * key_value b = { 7, 1); - * - * key_value larger = thrust::max(a, b, compare_key_value()); - * - * // larger is {13, 0} - * \endcode - * - * \note Returns the first argument when the arguments are equivalent. - * \see min - */ -template -__host__ __device__ - T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs, BinaryPredicate comp); - - -/*! This version of \p max returns the larger of two values. - * \param lhs The first value to compare. - * \param rhs The second value to compare. - * \return The larger element. - * - * \tparam T is a model of LessThan Comparable. - * - * The following code snippet demonstrates how to use \p max to compute the larger of two - * integers. - * - * \code - * #include - * ... - * int a = 13; - * int b = 7; - * - * int larger = thrust::min(a, b); - * - * // larger is 13 - * \endcode - * - * \note Returns the first argument when the arguments are equivalent. - * \see min - */ -template -__host__ __device__ - T max THRUST_PREVENT_MACRO_SUBSTITUTION (const T &lhs, const T &rhs); - - -/*! \addtogroup reductions - * \{ - * \addtogroup extrema - * \ingroup reductions - * \{ - */ - -/*! \p min_element finds the smallest element in the range [first, last). - * It returns the first iterator \c i in [first, last) - * such that no other iterator in [first, last) points to a value smaller - * than \c *i. The return value is \p last if and only if [first, last) is an - * empty range. - * - * The two versions of \p min_element differ in how they define whether one element is - * less than another. This version compares objects using \c operator<. Specifically, - * this version of \p min_element returns the first iterator \c i in [first, last) - * such that, for every iterator \c j in [first, last), *j < *i is - * \c false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return An iterator pointing to the smallest element of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \c ForwardIterator's \c value_type is a model of - * LessThan Comparable. - * - * \code - * #include - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int *result = thrust::min_element(thrust::host, data, data + 6); - * - * // result is data + 1 - * // *result is 0 - * \endcode - * - * \see http://www.sgi.com/tech/stl/min_element.html - */ -template -ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); - - -/*! \p min_element finds the smallest element in the range [first, last). - * It returns the first iterator \c i in [first, last) - * such that no other iterator in [first, last) points to a value smaller - * than \c *i. The return value is \p last if and only if [first, last) is an - * empty range. - * - * The two versions of \p min_element differ in how they define whether one element is - * less than another. This version compares objects using \c operator<. Specifically, - * this version of \p min_element returns the first iterator \c i in [first, last) - * such that, for every iterator \c j in [first, last), *j < *i is - * \c false. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return An iterator pointing to the smallest element of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \c ForwardIterator's \c value_type is a model of - * LessThan Comparable. - * - * \code - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int *result = thrust::min_element(data, data + 6); - * - * // result is data + 1 - * // *result is 0 - * \endcode - * - * \see http://www.sgi.com/tech/stl/min_element.html - */ -template -ForwardIterator min_element(ForwardIterator first, ForwardIterator last); - - -/*! \p min_element finds the smallest element in the range [first, last). - * It returns the first iterator \c i in [first, last) - * such that no other iterator in [first, last) points to a value smaller - * than \c *i. The return value is \p last if and only if [first, last) is an - * empty range. - * - * The two versions of \p min_element differ in how they define whether one element is - * less than another. This version compares objects using a function object \p comp. - * Specifically, this version of \p min_element returns the first iterator \c i in [first, last) - * such that, for every iterator \c j in [first, last), comp(*j, *i) is - * \c false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp A binary predicate used for comparison. - * \return An iterator pointing to the smallest element of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to both \p comp's - * \c first_argument_type and \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p min_element to find the smallest element - * of a collection of key-value pairs using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * - * struct key_value - * { - * int key; - * int value; - * }; - * - * struct compare_key_value - * { - * __host__ __device__ - * bool operator()(key_value lhs, key_value rhs) - * { - * return lhs.key < rhs.key; - * } - * }; - * - * ... - * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; - * - * key_value *smallest = thrust::min_element(thrust::host, data, data + 4, compare_key_value()); - * - * // smallest == data + 1 - * // *smallest == {0,7} - * \endcode - * - * \see http://www.sgi.com/tech/stl/min_element.html - */ -template -ForwardIterator min_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); - - -/*! \p min_element finds the smallest element in the range [first, last). - * It returns the first iterator \c i in [first, last) - * such that no other iterator in [first, last) points to a value smaller - * than \c *i. The return value is \p last if and only if [first, last) is an - * empty range. - * - * The two versions of \p min_element differ in how they define whether one element is - * less than another. This version compares objects using a function object \p comp. - * Specifically, this version of \p min_element returns the first iterator \c i in [first, last) - * such that, for every iterator \c j in [first, last), comp(*j, *i) is - * \c false. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp A binary predicate used for comparison. - * \return An iterator pointing to the smallest element of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to both \p comp's - * \c first_argument_type and \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p min_element to find the smallest element - * of a collection of key-value pairs. - * - * \code - * #include - * - * struct key_value - * { - * int key; - * int value; - * }; - * - * struct compare_key_value - * { - * __host__ __device__ - * bool operator()(key_value lhs, key_value rhs) - * { - * return lhs.key < rhs.key; - * } - * }; - * - * ... - * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; - * - * key_value *smallest = thrust::min_element(data, data + 4, compare_key_value()); - * - * // smallest == data + 1 - * // *smallest == {0,7} - * \endcode - * - * \see http://www.sgi.com/tech/stl/min_element.html - */ -template -ForwardIterator min_element(ForwardIterator first, ForwardIterator last, - BinaryPredicate comp); - - -/*! \p max_element finds the largest element in the range [first, last). - * It returns the first iterator \c i in [first, last) - * such that no other iterator in [first, last) points to a value larger - * than \c *i. The return value is \p last if and only if [first, last) is an - * empty range. - * - * The two versions of \p max_element differ in how they define whether one element is - * greater than another. This version compares objects using \c operator<. Specifically, - * this version of \p max_element returns the first iterator \c i in [first, last) - * such that, for every iterator \c j in [first, last), *i < *j is - * \c false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return An iterator pointing to the largest element of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam A Thrust backend system. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \c ForwardIterator's \c value_type is a model of - * LessThan Comparable. - * - * \code - * #include - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int *result = thrust::max_element(thrust::host, data, data + 6); - * - * // *result == 3 - * \endcode - * - * \see http://www.sgi.com/tech/stl/max_element.html - */ -template -ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); - - -/*! \p max_element finds the largest element in the range [first, last). - * It returns the first iterator \c i in [first, last) - * such that no other iterator in [first, last) points to a value larger - * than \c *i. The return value is \p last if and only if [first, last) is an - * empty range. - * - * The two versions of \p max_element differ in how they define whether one element is - * greater than another. This version compares objects using \c operator<. Specifically, - * this version of \p max_element returns the first iterator \c i in [first, last) - * such that, for every iterator \c j in [first, last), *i < *j is - * \c false. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return An iterator pointing to the largest element of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \c ForwardIterator's \c value_type is a model of - * LessThan Comparable. - * - * \code - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int *result = thrust::max_element(data, data + 6); - * - * // *result == 3 - * \endcode - * - * \see http://www.sgi.com/tech/stl/max_element.html - */ -template -ForwardIterator max_element(ForwardIterator first, ForwardIterator last); - - -/*! \p max_element finds the largest element in the range [first, last). - * It returns the first iterator \c i in [first, last) - * such that no other iterator in [first, last) points to a value larger - * than \c *i. The return value is \p last if and only if [first, last) is an - * empty range. - * - * The two versions of \p max_element differ in how they define whether one element is - * less than another. This version compares objects using a function object \p comp. - * Specifically, this version of \p max_element returns the first iterator \c i in [first, last) - * such that, for every iterator \c j in [first, last), comp(*i, *j) is - * \c false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp A binary predicate used for comparison. - * \return An iterator pointing to the largest element of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to both \p comp's - * \c first_argument_type and \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p max_element to find the largest element - * of a collection of key-value pairs using the \p thrust::host execution policy for parallelization. - * - * \code - * #include - * #include - * ... - * - * struct key_value - * { - * int key; - * int value; - * }; - * - * struct compare_key_value - * { - * __host__ __device__ - * bool operator()(key_value lhs, key_value rhs) - * { - * return lhs.key < rhs.key; - * } - * }; - * - * ... - * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; - * - * key_value *largest = thrust::max_element(thrust::host, data, data + 4, compare_key_value()); - * - * // largest == data + 3 - * // *largest == {6,1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/max_element.html - */ -template -ForwardIterator max_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); - - -/*! \p max_element finds the largest element in the range [first, last). - * It returns the first iterator \c i in [first, last) - * such that no other iterator in [first, last) points to a value larger - * than \c *i. The return value is \p last if and only if [first, last) is an - * empty range. - * - * The two versions of \p max_element differ in how they define whether one element is - * less than another. This version compares objects using a function object \p comp. - * Specifically, this version of \p max_element returns the first iterator \c i in [first, last) - * such that, for every iterator \c j in [first, last), comp(*i, *j) is - * \c false. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp A binary predicate used for comparison. - * \return An iterator pointing to the largest element of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to both \p comp's - * \c first_argument_type and \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p max_element to find the largest element - * of a collection of key-value pairs. - * - * \code - * #include - * - * struct key_value - * { - * int key; - * int value; - * }; - * - * struct compare_key_value - * { - * __host__ __device__ - * bool operator()(key_value lhs, key_value rhs) - * { - * return lhs.key < rhs.key; - * } - * }; - * - * ... - * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; - * - * key_value *largest = thrust::max_element(data, data + 4, compare_key_value()); - * - * // largest == data + 3 - * // *largest == {6,1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/max_element.html - */ -template -ForwardIterator max_element(ForwardIterator first, ForwardIterator last, - BinaryPredicate comp); - - -/*! \p minmax_element finds the smallest and largest elements in the range [first, last). - * It returns a pair of iterators (imin, imax) where \c imin is the same iterator - * returned by \p min_element and \c imax is the same iterator returned by \p max_element. - * This function is potentially more efficient than separate calls to \p min_element and \p max_element. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return A pair of iterator pointing to the smallest and largest elements of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \c ForwardIterator's \c value_type is a model of - * LessThan Comparable. - * - * \code - * #include - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * thrust::pair result = thrust::minmax_element(thrust::host, data, data + 6); - * - * // result.first is data + 1 - * // result.second is data + 5 - * // *result.first is 0 - * // *result.second is 3 - * \endcode - * - * \see min_element - * \see max_element - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf - */ -template -thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last); - - -/*! \p minmax_element finds the smallest and largest elements in the range [first, last). - * It returns a pair of iterators (imin, imax) where \c imin is the same iterator - * returned by \p min_element and \c imax is the same iterator returned by \p max_element. - * This function is potentially more efficient than separate calls to \p min_element and \p max_element. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return A pair of iterator pointing to the smallest and largest elements of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \c ForwardIterator's \c value_type is a model of - * LessThan Comparable. - * - * \code - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * thrust::pair result = thrust::minmax_element(data, data + 6); - * - * // result.first is data + 1 - * // result.second is data + 5 - * // *result.first is 0 - * // *result.second is 3 - * \endcode - * - * \see min_element - * \see max_element - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf - */ -template -thrust::pair minmax_element(ForwardIterator first, - ForwardIterator last); - - -/*! \p minmax_element finds the smallest and largest elements in the range [first, last). - * It returns a pair of iterators (imin, imax) where \c imin is the same iterator - * returned by \p min_element and \c imax is the same iterator returned by \p max_element. - * This function is potentially more efficient than separate calls to \p min_element and \p max_element. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp A binary predicate used for comparison. - * \return A pair of iterator pointing to the smallest and largest elements of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to both \p comp's - * \c first_argument_type and \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p minmax_element to find the smallest and largest elements - * of a collection of key-value pairs using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * struct key_value - * { - * int key; - * int value; - * }; - * - * struct compare_key_value - * { - * __host__ __device__ - * bool operator()(key_value lhs, key_value rhs) - * { - * return lhs.key < rhs.key; - * } - * }; - * - * ... - * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; - * - * thrust::pair extrema = thrust::minmax_element(thrust::host, data, data + 4, compare_key_value()); - * - * // extrema.first == data + 1 - * // *extrema.first == {0,7} - * // extrema.second == data + 3 - * // *extrema.second == {6,1} - * \endcode - * - * \see min_element - * \see max_element - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf - */ -template -thrust::pair minmax_element(const thrust::detail::execution_policy_base &exec, ForwardIterator first, ForwardIterator last, BinaryPredicate comp); - - -/*! \p minmax_element finds the smallest and largest elements in the range [first, last). - * It returns a pair of iterators (imin, imax) where \c imin is the same iterator - * returned by \p min_element and \c imax is the same iterator returned by \p max_element. - * This function is potentially more efficient than separate calls to \p min_element and \p max_element. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp A binary predicate used for comparison. - * \return A pair of iterator pointing to the smallest and largest elements of the range [first, last), - * if it is not an empty range; \p last, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to both \p comp's - * \c first_argument_type and \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p minmax_element to find the smallest and largest elements - * of a collection of key-value pairs. - * - * \code - * #include - * #include - * - * struct key_value - * { - * int key; - * int value; - * }; - * - * struct compare_key_value - * { - * __host__ __device__ - * bool operator()(key_value lhs, key_value rhs) - * { - * return lhs.key < rhs.key; - * } - * }; - * - * ... - * key_value data[4] = { {4,5}, {0,7}, {2,3}, {6,1} }; - * - * thrust::pair extrema = thrust::minmax_element(data, data + 4, compare_key_value()); - * - * // extrema.first == data + 1 - * // *extrema.first == {0,7} - * // extrema.second == data + 3 - * // *extrema.second == {6,1} - * \endcode - * - * \see min_element - * \see max_element - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1840.pdf - */ -template -thrust::pair minmax_element(ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp); - -/*! \} // end extrema - * \} // end reductions - */ - -} // end thrust - -#include -#include - diff --git a/compat/thrust/fill.h b/compat/thrust/fill.h deleted file mode 100644 index b492cec9dd..0000000000 --- a/compat/thrust/fill.h +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file fill.h - * \brief Fills a range with a constant value - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup transformations - * \addtogroup filling - * \ingroup transformations - * \{ - */ - - -/*! \p fill assigns the value \p value to every element in - * the range [first, last). That is, for every - * iterator \c i in [first, last), it performs - * the assignment *i = value. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param value The value to be copied. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam T is a model of Assignable, - * and \p T's \c value_type is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's - * elements to a given value using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector v(4); - * thrust::fill(thrust::device, v.begin(), v.end(), 137); - * - * // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137 - * \endcode - * - * \see http://www.sgi.com/tech/stl/fill.html - * \see \c fill_n - * \see \c uninitialized_fill - */ -template - void fill(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &value); - - -/*! \p fill assigns the value \p value to every element in - * the range [first, last). That is, for every - * iterator \c i in [first, last), it performs - * the assignment *i = value. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param value The value to be copied. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam T is a model of Assignable, - * and \p T's \c value_type is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's - * elements to a given value. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(4); - * thrust::fill(v.begin(), v.end(), 137); - * - * // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137 - * \endcode - * - * \see http://www.sgi.com/tech/stl/fill.html - * \see \c fill_n - * \see \c uninitialized_fill - */ -template - void fill(ForwardIterator first, - ForwardIterator last, - const T &value); - - -/*! \p fill_n assigns the value \p value to every element in - * the range [first, first+n). That is, for every - * iterator \c i in [first, first+n), it performs - * the assignment *i = value. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param n The size of the sequence. - * \param value The value to be copied. - * \return first + n - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam T is a model of Assignable, - * and \p T's \c value_type is convertible to a type in \p OutputIterator's set of \c value_type. - * - * The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's - * elements to a given value using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector v(4); - * thrust::fill_n(thrust::device, v.begin(), v.size(), 137); - * - * // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137 - * \endcode - * - * \see http://www.sgi.com/tech/stl/fill_n.html - * \see \c fill - * \see \c uninitialized_fill_n - */ -template - OutputIterator fill_n(const thrust::detail::execution_policy_base &exec, - OutputIterator first, - Size n, - const T &value); - - -/*! \p fill_n assigns the value \p value to every element in - * the range [first, first+n). That is, for every - * iterator \c i in [first, first+n), it performs - * the assignment *i = value. - * - * \param first The beginning of the sequence. - * \param n The size of the sequence. - * \param value The value to be copied. - * \return first + n - * - * \tparam OutputIterator is a model of Output Iterator. - * \tparam T is a model of Assignable, - * and \p T's \c value_type is convertible to a type in \p OutputIterator's set of \c value_type. - * - * The following code snippet demonstrates how to use \p fill to set a thrust::device_vector's - * elements to a given value. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(4); - * thrust::fill_n(v.begin(), v.size(), 137); - * - * // v[0] == 137, v[1] == 137, v[2] == 137, v[3] == 137 - * \endcode - * - * \see http://www.sgi.com/tech/stl/fill_n.html - * \see \c fill - * \see \c uninitialized_fill_n - */ -template - OutputIterator fill_n(OutputIterator first, - Size n, - const T &value); - - -/*! \} // end filling - * \} // transformations - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/find.h b/compat/thrust/find.h deleted file mode 100644 index fa01ded500..0000000000 --- a/compat/thrust/find.h +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file find.h - * \brief Locating values in (unsorted) ranges - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup algorithms - */ - -/*! \addtogroup searching - * \ingroup algorithms - * \{ - */ - - -/*! \p find returns the first iterator \c i in the range - * [first, last) such that *i == value - * or \c last if no such iterator exists. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first Beginning of the sequence to search. - * \param last End of the sequence to search. - * \param value The value to find. - * \return The first iterator \c i such that *i == value or \c last. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \p InputIterator's \c value_type is equality comparable to type \c T. - * \tparam T is a model of EqualityComparable. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector input(4); - * - * input[0] = 0; - * input[1] = 5; - * input[2] = 3; - * input[3] = 7; - * - * thrust::device_vector::iterator iter; - * - * iter = thrust::find(thrust::device, input.begin(), input.end(), 3); // returns input.first() + 2 - * iter = thrust::find(thrust::device, input.begin(), input.end(), 5); // returns input.first() + 1 - * iter = thrust::find(thrust::device, input.begin(), input.end(), 9); // returns input.end() - * \endcode - * - * \see find_if - * \see mismatch - */ -template -InputIterator find(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - const T& value); - - -/*! \p find returns the first iterator \c i in the range - * [first, last) such that *i == value - * or \c last if no such iterator exists. - * - * \param first Beginning of the sequence to search. - * \param last End of the sequence to search. - * \param value The value to find. - * \return The first iterator \c i such that *i == value or \c last. - * - * \tparam InputIterator is a model of Input Iterator - * and \p InputIterator's \c value_type is equality comparable to type \c T. - * \tparam T is a model of EqualityComparable. - * - * \code - * #include - * #include - * ... - * thrust::device_vector input(4); - * - * input[0] = 0; - * input[1] = 5; - * input[2] = 3; - * input[3] = 7; - * - * thrust::device_vector::iterator iter; - * - * iter = thrust::find(input.begin(), input.end(), 3); // returns input.first() + 2 - * iter = thrust::find(input.begin(), input.end(), 5); // returns input.first() + 1 - * iter = thrust::find(input.begin(), input.end(), 9); // returns input.end() - * \endcode - * - * \see find_if - * \see mismatch - */ -template -InputIterator find(InputIterator first, - InputIterator last, - const T& value); - - -/*! \p find_if returns the first iterator \c i in the range - * [first, last) such that pred(*i) is \c true - * or \c last if no such iterator exists. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first Beginning of the sequence to search. - * \param last End of the sequence to search. - * \param pred A predicate used to test range elements. - * \return The first iterator \c i such that pred(*i) is \c true, or \c last. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator. - * \tparam Predicate is a model of Predicate. - * - * \code - * #include - * #include - * #include - * ... - * - * struct greater_than_four - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x > 4; - * } - * }; - * - * struct greater_than_ten - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x > 10; - * } - * }; - * - * ... - * thrust::device_vector input(4); - * - * input[0] = 0; - * input[1] = 5; - * input[2] = 3; - * input[3] = 7; - * - * thrust::device_vector::iterator iter; - * - * iter = thrust::find_if(thrust::device, input.begin(), input.end(), greater_than_four()); // returns input.first() + 1 - * - * iter = thrust::find_if(thrust::device, input.begin(), input.end(), greater_than_ten()); // returns input.end() - * \endcode - * - * \see find - * \see find_if_not - * \see mismatch - */ -template -InputIterator find_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - Predicate pred); - - -/*! \p find_if returns the first iterator \c i in the range - * [first, last) such that pred(*i) is \c true - * or \c last if no such iterator exists. - * - * \param first Beginning of the sequence to search. - * \param last End of the sequence to search. - * \param pred A predicate used to test range elements. - * \return The first iterator \c i such that pred(*i) is \c true, or \c last. - * - * \tparam InputIterator is a model of Input Iterator. - * \tparam Predicate is a model of Predicate. - * - * \code - * #include - * #include - * - * struct greater_than_four - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x > 4; - * } - * }; - * - * struct greater_than_ten - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x > 10; - * } - * }; - * - * ... - * thrust::device_vector input(4); - * - * input[0] = 0; - * input[1] = 5; - * input[2] = 3; - * input[3] = 7; - * - * thrust::device_vector::iterator iter; - * - * iter = thrust::find_if(input.begin(), input.end(), greater_than_four()); // returns input.first() + 1 - * - * iter = thrust::find_if(input.begin(), input.end(), greater_than_ten()); // returns input.end() - * \endcode - * - * \see find - * \see find_if_not - * \see mismatch - */ -template -InputIterator find_if(InputIterator first, - InputIterator last, - Predicate pred); - - -/*! \p find_if_not returns the first iterator \c i in the range - * [first, last) such that pred(*i) is \c false - * or \c last if no such iterator exists. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first Beginning of the sequence to search. - * \param last End of the sequence to search. - * \param pred A predicate used to test range elements. - * \return The first iterator \c i such that pred(*i) is \c false, or \c last. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator. - * \tparam Predicate is a model of Predicate. - * - * \code - * #include - * #include - * #include - * ... - * - * struct greater_than_four - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x > 4; - * } - * }; - * - * struct greater_than_ten - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x > 10; - * } - * }; - * - * ... - * thrust::device_vector input(4); - * - * input[0] = 0; - * input[1] = 5; - * input[2] = 3; - * input[3] = 7; - * - * thrust::device_vector::iterator iter; - * - * iter = thrust::find_if_not(thrust::device, input.begin(), input.end(), greater_than_four()); // returns input.first() - * - * iter = thrust::find_if_not(thrust::device, input.begin(), input.end(), greater_than_ten()); // returns input.first() - * \endcode - * - * \see find - * \see find_if - * \see mismatch - */ -template -InputIterator find_if_not(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - Predicate pred); - - -/*! \p find_if_not returns the first iterator \c i in the range - * [first, last) such that pred(*i) is \c false - * or \c last if no such iterator exists. - * - * \param first Beginning of the sequence to search. - * \param last End of the sequence to search. - * \param pred A predicate used to test range elements. - * \return The first iterator \c i such that pred(*i) is \c false, or \c last. - * - * \tparam InputIterator is a model of Input Iterator. - * \tparam Predicate is a model of Predicate. - * - * \code - * #include - * #include - * - * struct greater_than_four - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x > 4; - * } - * }; - * - * struct greater_than_ten - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x > 10; - * } - * }; - * - * ... - * thrust::device_vector input(4); - * - * input[0] = 0; - * input[1] = 5; - * input[2] = 3; - * input[3] = 7; - * - * thrust::device_vector::iterator iter; - * - * iter = thrust::find_if_not(input.begin(), input.end(), greater_than_four()); // returns input.first() - * - * iter = thrust::find_if_not(input.begin(), input.end(), greater_than_ten()); // returns input.first() - * \endcode - * - * \see find - * \see find_if - * \see mismatch - */ -template -InputIterator find_if_not(InputIterator first, - InputIterator last, - Predicate pred); - -/*! \} // end searching - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/for_each.h b/compat/thrust/for_each.h deleted file mode 100644 index efab9d8fab..0000000000 --- a/compat/thrust/for_each.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file for_each.h - * \brief Applies a function to each element in a range - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup modifying - * \ingroup transformations - * \{ - */ - - -/*! \p for_each applies the function object \p f to each element - * in the range [first, last); \p f's return value, if any, - * is ignored. Unlike the C++ Standard Template Library function - * std::for_each, this version offers no guarantee on - * order of execution. For this reason, this version of \p for_each - * does not return a copy of the function object. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param f The function object to apply to the range [first, last). - * \return last - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. - * \tparam UnaryFunction is a model of Unary Function, - * and \p UnaryFunction does not apply any non-constant operation through its argument. - * - * The following code snippet demonstrates how to use \p for_each to print the elements - * of a \p std::device_vector using the \p thrust::device parallelization policy: - * - * \code - * #include - * #include - * #include - * #include - * ... - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * // note that using printf in a __device__ function requires - * // code compiled for a GPU with compute capability 2.0 or - * // higher (nvcc --arch=sm_20) - * printf("%d\n"); - * } - * }; - * ... - * thrust::device_vector d_vec(3); - * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; - * - * thrust::for_each(thrust::device, d_vec.begin(), d_vec.end(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - * - * \see for_each_n - * \see http://www.sgi.com/tech/stl/for_each.html - */ -template -InputIterator for_each(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - UnaryFunction f); - - -/*! \p for_each_n applies the function object \p f to each element - * in the range [first, first + n); \p f's return value, if any, - * is ignored. Unlike the C++ Standard Template Library function - * std::for_each, this version offers no guarantee on - * order of execution. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param n The size of the input sequence. - * \param f The function object to apply to the range [first, first + n). - * \return first + n - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. - * \tparam Size is an integral type. - * \tparam UnaryFunction is a model of Unary Function, - * and \p UnaryFunction does not apply any non-constant operation through its argument. - * - * The following code snippet demonstrates how to use \p for_each_n to print the elements - * of a \p device_vector using the \p thrust::device parallelization policy. - * - * \code - * #include - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * // note that using printf in a __device__ function requires - * // code compiled for a GPU with compute capability 2.0 or - * // higher (nvcc --arch=sm_20) - * printf("%d\n"); - * } - * }; - * ... - * thrust::device_vector d_vec(3); - * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; - * - * thrust::for_each_n(thrust::device, d_vec.begin(), d_vec.size(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - * - * \see for_each - * \see http://www.sgi.com/tech/stl/for_each.html - */ -template -InputIterator for_each_n(const thrust::detail::execution_policy_base &exec, - InputIterator first, - Size n, - UnaryFunction f); - -/*! \p for_each applies the function object \p f to each element - * in the range [first, last); \p f's return value, if any, - * is ignored. Unlike the C++ Standard Template Library function - * std::for_each, this version offers no guarantee on - * order of execution. For this reason, this version of \p for_each - * does not return a copy of the function object. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param f The function object to apply to the range [first, last). - * \return last - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. - * \tparam UnaryFunction is a model of Unary Function, - * and \p UnaryFunction does not apply any non-constant operation through its argument. - * - * The following code snippet demonstrates how to use \p for_each to print the elements - * of a \p device_vector. - * - * \code - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * // note that using printf in a __device__ function requires - * // code compiled for a GPU with compute capability 2.0 or - * // higher (nvcc --arch=sm_20) - * printf("%d\n"); - * } - * }; - * ... - * thrust::device_vector d_vec(3); - * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; - * - * thrust::for_each(d_vec.begin(), d_vec.end(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - * - * \see for_each_n - * \see http://www.sgi.com/tech/stl/for_each.html - */ -template -InputIterator for_each(InputIterator first, - InputIterator last, - UnaryFunction f); - - -/*! \p for_each_n applies the function object \p f to each element - * in the range [first, first + n); \p f's return value, if any, - * is ignored. Unlike the C++ Standard Template Library function - * std::for_each, this version offers no guarantee on - * order of execution. - * - * \param first The beginning of the sequence. - * \param n The size of the input sequence. - * \param f The function object to apply to the range [first, first + n). - * \return first + n - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. - * \tparam Size is an integral type. - * \tparam UnaryFunction is a model of Unary Function, - * and \p UnaryFunction does not apply any non-constant operation through its argument. - * - * The following code snippet demonstrates how to use \p for_each_n to print the elements - * of a \p device_vector. - * - * \code - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * // note that using printf in a __device__ function requires - * // code compiled for a GPU with compute capability 2.0 or - * // higher (nvcc --arch=sm_20) - * printf("%d\n"); - * } - * }; - * ... - * thrust::device_vector d_vec(3); - * d_vec[0] = 0; d_vec[1] = 1; d_vec[2] = 2; - * - * thrust::for_each_n(d_vec.begin(), d_vec.size(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - * - * \see for_each - * \see http://www.sgi.com/tech/stl/for_each.html - */ -template -InputIterator for_each_n(InputIterator first, - Size n, - UnaryFunction f); - -/*! \} // end modifying - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/functional.h b/compat/thrust/functional.h deleted file mode 100644 index b3d47f9179..0000000000 --- a/compat/thrust/functional.h +++ /dev/null @@ -1,1079 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file functional.h - * \brief Function objects and tools for manipulating them - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup function_objects Function Objects - */ - -template struct unary_traits; - -template struct binary_traits; - -/*! \addtogroup function_object_adaptors Function Object Adaptors - * \ingroup function_objects - * \{ - */ - -/*! \p unary_function is an empty base class: it contains no member functions - * or member variables, but only type information. The only reason it exists - * is to make it more convenient to define types that are models of the - * concept Adaptable Unary Function. Specifically, any model of Adaptable - * Unary Function must define nested \c typedefs. Those \c typedefs are - * provided by the base class \p unary_function. - * - * The following code snippet demonstrates how to construct an - * Adaptable Unary Function using \p unary_function. - * - * \code - * struct sine : public thrust::unary_function - * { - * __host__ __device__ - * float operator()(float x) { return sinf(x); } - * }; - * \endcode - * - * \note unary_function is currently redundant with the C++ STL type - * \c std::unary_function. We reserve it here for potential additional - * functionality at a later date. - * - * \see http://www.sgi.com/tech/stl/unary_function.html - * \see binary_function - */ -template - struct unary_function - : public std::unary_function -{ -}; // end unary_function - -/*! \p binary_function is an empty base class: it contains no member functions - * or member variables, but only type information. The only reason it exists - * is to make it more convenient to define types that are models of the - * concept Adaptable Binary Function. Specifically, any model of Adaptable - * Binary Function must define nested \c typedefs. Those \c typedefs are - * provided by the base class \p binary_function. - * - * The following code snippet demonstrates how to construct an - * Adaptable Binary Function using \p binary_function. - * - * \code - * struct exponentiate : public thrust::binary_function - * { - * __host__ __device__ - * float operator()(float x, float y) { return powf(x,y); } - * }; - * \endcode - * - * \note binary_function is currently redundant with the C++ STL type - * \c std::binary_function. We reserve it here for potential additional - * functionality at a later date. - * - * \see http://www.sgi.com/tech/stl/binary_function.html - * \see unary_function - */ -template - struct binary_function - : public std::binary_function -{ -}; // end binary_function - -/*! \} - */ - - -/*! \addtogroup predefined_function_objects Predefined Function Objects - * \ingroup function_objects - */ - -/*! \addtogroup arithmetic_operations Arithmetic Operations - * \ingroup predefined_function_objects - * \{ - */ - -/*! \p plus is a function object. Specifically, it is an Adaptable Binary Function. - * If \c f is an object of class plus, and \c x and \c y are objects - * of class \c T, then f(x,y) returns x+y. - * - * \tparam T is a model of Assignable, - * and if \c x and \c y are objects of type \p T, then x+y must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use plus to sum two - * device_vectors of \c floats. - * - * \code - * #include - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * thrust::device_vector V3(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * thrust::fill(V2.begin(), V2.end(), 75); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), - * thrust::plus()); - * // V3 is now {76, 77, 78, ..., 1075} - * \endcode - * - * \see http://www.sgi.com/tech/stl/plus.html - * \see binary_function - */ -template - struct plus : public binary_function -{ - /*! Function call operator. The return value is lhs + rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs + rhs;} -}; // end plus - -/*! \p minus is a function object. Specifically, it is an Adaptable Binary Function. - * If \c f is an object of class minus, and \c x and \c y are objects - * of class \c T, then f(x,y) returns x-y. - * - * \tparam T is a model of Assignable, - * and if \c x and \c y are objects of type \p T, then x-y must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use minus to subtract - * a device_vector of \c floats from another. - * - * \code - * #include - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * thrust::device_vector V3(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * thrust::fill(V2.begin(), V2.end(), 75); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), - * thrust::minus()); - * // V3 is now {-74, -75, -76, ..., -925} - * \endcode - * - * \see http://www.sgi.com/tech/stl/minus.html - * \see binary_function - */ -template - struct minus : public binary_function -{ - /*! Function call operator. The return value is lhs - rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs - rhs;} -}; // end minus - -/*! \p multiplies is a function object. Specifically, it is an Adaptable Binary Function. - * If \c f is an object of class minus, and \c x and \c y are objects - * of class \c T, then f(x,y) returns x*y. - * - * \tparam T is a model of Assignable, - * and if \c x and \c y are objects of type \p T, then x*y must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use multiplies to multiply - * two device_vectors of \c floats. - * - * \code - * #include - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * thrust::device_vector V3(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * thrust::fill(V2.begin(), V2.end(), 75); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), - * thrust::multiplies()); - * // V3 is now {75, 150, 225, ..., 75000} - * \endcode - * - * \see http://www.sgi.com/tech/stl/multiplies.html - * \see binary_function - */ -template - struct multiplies : public binary_function -{ - /*! Function call operator. The return value is lhs * rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs * rhs;} -}; // end multiplies - -/*! \p divides is a function object. Specifically, it is an Adaptable Binary Function. - * If \c f is an object of class divides, and \c x and \c y are objects - * of class \c T, then f(x,y) returns x/y. - * - * \tparam T is a model of Assignable, - * and if \c x and \c y are objects of type \p T, then x/y must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use divides to divide - * one device_vectors of \c floats by another. - * - * \code - * #include - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * thrust::device_vector V3(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * thrust::fill(V2.begin(), V2.end(), 75); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), - * thrust::divides()); - * // V3 is now {1/75, 2/75, 3/75, ..., 1000/75} - * \endcode - * - * \see http://www.sgi.com/tech/stl/divides.html - * \see binary_function - */ -template - struct divides : public binary_function -{ - /*! Function call operator. The return value is lhs / rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs / rhs;} -}; // end divides - -/*! \p modulus is a function object. Specifically, it is an Adaptable Binary Function. - * If \c f is an object of class divides, and \c x and \c y are objects - * of class \c T, then f(x,y) returns x%y. - * - * \tparam T is a model of Assignable, - * and if \c x and \c y are objects of type \p T, then x%y must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use modulus to take - * the modulus of one device_vectors of \c floats by another. - * - * \code - * #include - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * thrust::device_vector V3(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * thrust::fill(V2.begin(), V2.end(), 75); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), - * thrust::modulus()); - * // V3 is now {1%75, 2%75, 3%75, ..., 1000%75} - * \endcode - * - * \see http://www.sgi.com/tech/stl/modulus.html - * \see binary_function - */ -template - struct modulus : public binary_function -{ - /*! Function call operator. The return value is lhs % rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs % rhs;} -}; // end modulus - -/*! \p negate is a function object. Specifically, it is an Adaptable Unary Function. - * If \c f is an object of class negate, and \c x is an object - * of class \c T, then f(x) returns -x. - * - * \tparam T is a model of Assignable, - * and if \c x is an object of type \p T, then -x must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use negate to negate - * the element of a device_vector of \c floats. - * - * \code - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), - * thrust::negate()); - * // V2 is now {-1, -2, -3, ..., -1000} - * \endcode - * - * \see http://www.sgi.com/tech/stl/negate.html - * \see unary_function - */ -template - struct negate : public unary_function -{ - /*! Function call operator. The return value is -x. - */ - __host__ __device__ T operator()(const T &x) const {return -x;} -}; // end negate - -/*! \} - */ - -/*! \addtogroup comparison_operations Comparison Operations - * \ingroup predefined_function_objects - * \{ - */ - -/*! \p equal_to is a function object. Specifically, it is an Adaptable Binary - * Predicate, which means it is a function object that tests the truth or falsehood - * of some condition. If \c f is an object of class equal_to and \c x - * and \c y are objects of class \c T, then f(x,y) returns \c true if - * x == y and \c false otherwise. - * - * \tparam T is a model of Equality Comparable. - * - * \see http://www.sgi.com/tech/stl/equal_to.html - * \see binary_function - */ -template - struct equal_to : public binary_function -{ - /*! Function call operator. The return value is lhs == rhs. - */ - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs == rhs;} -}; // end equal_to - -/*! \p not_equal_to is a function object. Specifically, it is an Adaptable Binary - * Predicate, which means it is a function object that tests the truth or falsehood - * of some condition. If \c f is an object of class not_equal_to and \c x - * and \c y are objects of class \c T, then f(x,y) returns \c true if - * x != y and \c false otherwise. - * - * \tparam T is a model of Equality Comparable. - * - * \see http://www.sgi.com/tech/stl/not_equal_to.html - * \see binary_function - */ -template - struct not_equal_to : public binary_function -{ - /*! Function call operator. The return value is lhs != rhs. - */ - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs != rhs;} -}; // end not_equal_to - -/*! \p greater is a function object. Specifically, it is an Adaptable Binary - * Predicate, which means it is a function object that tests the truth or falsehood - * of some condition. If \c f is an object of class greater and \c x - * and \c y are objects of class \c T, then f(x,y) returns \c true if - * x > y and \c false otherwise. - * - * \tparam T is a model of LessThan Comparable. - * - * \see http://www.sgi.com/tech/stl/greater.html - * \see binary_function - */ -template - struct greater : public binary_function -{ - /*! Function call operator. The return value is lhs > rhs. - */ - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs > rhs;} -}; // end greater - -/*! \p less is a function object. Specifically, it is an Adaptable Binary - * Predicate, which means it is a function object that tests the truth or falsehood - * of some condition. If \c f is an object of class less and \c x - * and \c y are objects of class \c T, then f(x,y) returns \c true if - * x < y and \c false otherwise. - * - * \tparam T is a model of LessThan Comparable. - * - * \see http://www.sgi.com/tech/stl/less.html - * \see binary_function - */ -template - struct less : public binary_function -{ - /*! Function call operator. The return value is lhs < rhs. - */ - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs < rhs;} -}; // end less - -/*! \p greater_equal is a function object. Specifically, it is an Adaptable Binary - * Predicate, which means it is a function object that tests the truth or falsehood - * of some condition. If \c f is an object of class greater_equal and \c x - * and \c y are objects of class \c T, then f(x,y) returns \c true if - * x >= y and \c false otherwise. - * - * \tparam T is a model of LessThan Comparable. - * - * \see http://www.sgi.com/tech/stl/greater_equal.html - * \see binary_function - */ -template - struct greater_equal : public binary_function -{ - /*! Function call operator. The return value is lhs >= rhs. - */ - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs >= rhs;} -}; // end greater_equal - -/*! \p less_equal is a function object. Specifically, it is an Adaptable Binary - * Predicate, which means it is a function object that tests the truth or falsehood - * of some condition. If \c f is an object of class less_equal and \c x - * and \c y are objects of class \c T, then f(x,y) returns \c true if - * x <= y and \c false otherwise. - * - * \tparam T is a model of LessThan Comparable. - * - * \see http://www.sgi.com/tech/stl/less_equal.html - * \see binary_function - */ -template - struct less_equal : public binary_function -{ - /*! Function call operator. The return value is lhs <= rhs. - */ - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs <= rhs;} -}; // end less_equal - -/*! \} - */ - - -/*! \addtogroup logical_operations Logical Operations - * \ingroup predefined_function_objects - * \{ - */ - -/*! \p logical_and is a function object. Specifically, it is an Adaptable Binary Predicate, - * which means it is a function object that tests the truth or falsehood of some condition. - * If \c f is an object of class logical_and and \c x and \c y are objects of - * class \c T (where \c T is convertible to \c bool) then f(x,y) returns \c true - * if and only if both \c x and \c y are \c true. - * - * \tparam T must be convertible to \c bool. - * - * \see http://www.sgi.com/tech/stl/logical_and.html - * \see binary_function - */ -template - struct logical_and : public binary_function -{ - /*! Function call operator. The return value is lhs && rhs. - */ - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs && rhs;} -}; // end logical_and - -/*! \p logical_or is a function object. Specifically, it is an Adaptable Binary Predicate, - * which means it is a function object that tests the truth or falsehood of some condition. - * If \c f is an object of class logical_or and \c x and \c y are objects of - * class \c T (where \c T is convertible to \c bool) then f(x,y) returns \c true - * if and only if either \c x or \c y are \c true. - * - * \tparam T must be convertible to \c bool. - * - * \see http://www.sgi.com/tech/stl/logical_or.html - * \see binary_function - */ -template - struct logical_or : public binary_function -{ - /*! Function call operator. The return value is lhs || rhs. - */ - __host__ __device__ bool operator()(const T &lhs, const T &rhs) const {return lhs || rhs;} -}; // end logical_or - -/*! \p logical_not is a function object. Specifically, it is an Adaptable Predicate, - * which means it is a function object that tests the truth or falsehood of some condition. - * If \c f is an object of class logical_not and \c x is an object of - * class \c T (where \c T is convertible to \c bool) then f(x) returns \c true - * if and only if \c x is \c false. - * - * \tparam T must be convertible to \c bool. - * - * The following code snippet demonstrates how to use \p logical_not to transform - * a device_vector of \c bools into its logical complement. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector V; - * ... - * thrust::transform(V.begin(), V.end(), V.begin(), thrust::logical_not()); - * // The elements of V are now the logical complement of what they were prior - * \endcode - * - * \see http://www.sgi.com/tech/stl/logical_not.html - * \see unary_function - */ -template - struct logical_not : public unary_function -{ - /*! Function call operator. The return value is !x. - */ - __host__ __device__ bool operator()(const T &x) const {return !x;} -}; // end logical_not - -/*! \} - */ - -/*! \addtogroup bitwise_operations Bitwise Operations - * \ingroup predefined_function_objects - * \{ - */ - -/*! \p bit_and is a function object. Specifically, it is an Adaptable Binary Function. - * If \c f is an object of class bit_and, and \c x and \c y are objects - * of class \c T, then f(x,y) returns x&y. - * - * \tparam T is a model of Assignable, - * and if \c x and \c y are objects of type \p T, then x&y must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use bit_and to take - * the bitwise AND of one device_vector of \c ints by another. - * - * \code - * #include - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * thrust::device_vector V3(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * thrust::fill(V2.begin(), V2.end(), 13); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), - * thrust::bit_and()); - * // V3 is now {1&13, 2&13, 3&13, ..., 1000%13} - * \endcode - * - * \see binary_function - */ -template - struct bit_and : public binary_function -{ - /*! Function call operator. The return value is lhs & rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs & rhs;} -}; // end bit_and - -/*! \p bit_or is a function object. Specifically, it is an Adaptable Binary Function. - * If \c f is an object of class bit_and, and \c x and \c y are objects - * of class \c T, then f(x,y) returns x|y. - * - * \tparam T is a model of Assignable, - * and if \c x and \c y are objects of type \p T, then x|y must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use bit_or to take - * the bitwise OR of one device_vector of \c ints by another. - * - * \code - * #include - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * thrust::device_vector V3(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * thrust::fill(V2.begin(), V2.end(), 13); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), - * thrust::bit_or()); - * // V3 is now {1|13, 2|13, 3|13, ..., 1000|13} - * \endcode - * - * \see binary_function - */ -template - struct bit_or : public binary_function -{ - /*! Function call operator. The return value is lhs | rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs | rhs;} -}; // end bit_or - -/*! \p bit_xor is a function object. Specifically, it is an Adaptable Binary Function. - * If \c f is an object of class bit_and, and \c x and \c y are objects - * of class \c T, then f(x,y) returns x^y. - * - * \tparam T is a model of Assignable, - * and if \c x and \c y are objects of type \p T, then x^y must be defined and must have a return type that is convertible to \c T. - * - * The following code snippet demonstrates how to use bit_xor to take - * the bitwise XOR of one device_vector of \c ints by another. - * - * \code - * #include - * #include - * #include - * #include - * #include - * ... - * const int N = 1000; - * thrust::device_vector V1(N); - * thrust::device_vector V2(N); - * thrust::device_vector V3(N); - * - * thrust::sequence(V1.begin(), V1.end(), 1); - * thrust::fill(V2.begin(), V2.end(), 13); - * - * thrust::transform(V1.begin(), V1.end(), V2.begin(), V3.begin(), - * thrust::bit_xor()); - * // V3 is now {1^13, 2^13, 3^13, ..., 1000^13} - * \endcode - * - * \see binary_function - */ -template - struct bit_xor : public binary_function -{ - /*! Function call operator. The return value is lhs ^ rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs ^ rhs;} -}; // end bit_xor - -/*! \} - */ - -/*! \addtogroup generalized_identity_operations Generalized Identity Operations - * \ingroup predefined_function_objects - * \{ - */ - -/*! \p identity is a Unary Function that represents the identity function: it takes - * a single argument \c x, and returns \c x. - * - * \tparam T No requirements on \p T. - * - * The following code snippet demonstrates that \p identity returns its - * argument. - * - * \code - * #include - * #include - * ... - * int x = 137; - * thrust::identity id; - * assert(x == id(x)); - * \endcode - * - * \see http://www.sgi.com/tech/stl/identity.html - * \see unary_function - */ -template - struct identity : public unary_function -{ - /*! Function call operator. The return value is x. - */ - __host__ __device__ const T &operator()(const T &x) const {return x;} -}; // end identity - -/*! \p maximum is a function object that takes two arguments and returns the greater - * of the two. Specifically, it is an Adaptable Binary Function. If \c f is an - * object of class maximum and \c x and \c y are objects of class \c T - * f(x,y) returns \c x if x > y and \c y, otherwise. - * - * \tparam T is a model of LessThan Comparable. - * - * The following code snippet demonstrates that \p maximum returns its - * greater argument. - * - * \code - * #include - * #include - * ... - * int x = 137; - * int y = -137; - * thrust::maximum mx; - * assert(x == mx(x,y)); - * \endcode - * - * \see minimum - * \see min - * \see binary_function - */ -template - struct maximum : public binary_function -{ - /*! Function call operator. The return value is rhs < lhs ? lhs : rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? rhs : lhs;} -}; // end maximum - -/*! \p minimum is a function object that takes two arguments and returns the lesser - * of the two. Specifically, it is an Adaptable Binary Function. If \c f is an - * object of class minimum and \c x and \c y are objects of class \c T - * f(x,y) returns \c x if x < y and \c y, otherwise. - * - * \tparam T is a model of LessThan Comparable. - * - * The following code snippet demonstrates that \p minimum returns its - * lesser argument. - * - * \code - * #include - * #include - * ... - * int x = 137; - * int y = -137; - * thrust::minimum mn; - * assert(y == mn(x,y)); - * \endcode - * - * \see maximum - * \see max - * \see binary_function - */ -template - struct minimum : public binary_function -{ - /*! Function call operator. The return value is lhs < rhs ? lhs : rhs. - */ - __host__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? lhs : rhs;} -}; // end minimum - -/*! \p project1st is a function object that takes two arguments and returns - * its first argument; the second argument is unused. It is essentially a - * generalization of identity to the case of a Binary Function. - * - * \code - * #include - * #include - * ... - * int x = 137; - * int y = -137; - * thrust::project1st pj1; - * assert(x == pj1(x,y)); - * \endcode - * - * \see identity - * \see project2nd - * \see binary_function - */ -template - struct project1st : public binary_function -{ - /*! Function call operator. The return value is lhs. - */ - __host__ __device__ const T1 &operator()(const T1 &lhs, const T2 &rhs) const {return lhs;} -}; // end project1st - -/*! \p project2nd is a function object that takes two arguments and returns - * its second argument; the first argument is unused. It is essentially a - * generalization of identity to the case of a Binary Function. - * - * \code - * #include - * #include - * ... - * int x = 137; - * int y = -137; - * thrust::project2nd pj2; - * assert(y == pj2(x,y)); - * \endcode - * - * \see identity - * \see project1st - * \see binary_function - */ -template - struct project2nd : public binary_function -{ - /*! Function call operator. The return value is rhs. - */ - __host__ __device__ const T2 &operator()(const T1 &lhs, const T2 &rhs) const {return rhs;} -}; // end project2nd - -/*! \} - */ - - -// odds and ends - -/*! \addtogroup function_object_adaptors - * \{ - */ - -/*! \p unary_negate is a function object adaptor: it is an Adaptable Predicate - * that represents the logical negation of some other Adaptable Predicate. - * That is: if \c f is an object of class unary_negate, - * then there exists an object \c pred of class \c AdaptablePredicate such - * that f(x) always returns the same value as !pred(x). - * There is rarely any reason to construct a unary_negate directly; - * it is almost always easier to use the helper function not1. - * - * \see http://www.sgi.com/tech/stl/unary_negate.html - * \see not1 - */ -template -struct unary_negate - : public thrust::unary_function -{ - /*! Constructor takes a \p Predicate object to negate. - * \param p The \p Predicate object to negate. - */ - __host__ __device__ - explicit unary_negate(Predicate p) : pred(p){} - - /*! Function call operator. The return value is !pred(x). - */ - __host__ __device__ - bool operator()(const typename Predicate::argument_type& x) { return !pred(x); } - - /*! \cond */ - Predicate pred; - /*! \endcond */ -}; // end unary_negate - -/*! \p not1 is a helper function to simplify the creation of Adaptable Predicates: - * it takes an Adaptable Predicate \p pred as an argument and returns a new Adaptable - * Predicate that represents the negation of \p pred. That is: if \c pred is an object - * of a type which models Adaptable Predicate, then the the type of the result - * \c npred of not1(pred) is also a model of Adaptable Predicate and - * npred(x) always returns the same value as !pred(x). - * - * \param pred The Adaptable Predicate to negate. - * \return A new object, npred such that npred(x) always returns - * the same value as !pred(x). - * - * \tparam Predicate is a model of Adaptable Predicate. - * - * \see unary_negate - * \see not2 - */ -template - __host__ __device__ - unary_negate not1(const Predicate &pred); - -/*! \p binary_negate is a function object adaptor: it is an Adaptable Binary - * Predicate that represents the logical negation of some other Adaptable - * Binary Predicate. That is: if \c f is an object of class binary_negate, - * then there exists an object \c pred of class \c AdaptableBinaryPredicate - * such that f(x,y) always returns the same value as !pred(x,y). - * There is rarely any reason to construct a binary_negate directly; - * it is almost always easier to use the helper function not2. - * - * \see http://www.sgi.com/tech/stl/binary_negate.html - */ -template -struct binary_negate - : public thrust::binary_function -{ - /*! Constructor takes a \p Predicate object to negate. - * \param p The \p Predicate object to negate. - */ - __host__ __device__ - explicit binary_negate(Predicate p) : pred(p){} - - /*! Function call operator. The return value is !pred(x,y). - */ - __host__ __device__ - bool operator()(const typename Predicate::first_argument_type& x, const typename Predicate::second_argument_type& y) - { - return !pred(x,y); - } - - /*! \cond */ - Predicate pred; - /*! \endcond */ -}; // end binary_negate - -/*! \p not2 is a helper function to simplify the creation of Adaptable Binary Predicates: - * it takes an Adaptable Binary Predicate \p pred as an argument and returns a new Adaptable - * Binary Predicate that represents the negation of \p pred. That is: if \c pred is an object - * of a type which models Adaptable Binary Predicate, then the the type of the result - * \c npred of not2(pred) is also a model of Adaptable Binary Predicate and - * npred(x,y) always returns the same value as !pred(x,y). - * - * \param pred The Adaptable Binary Predicate to negate. - * \return A new object, npred such that npred(x,y) always returns - * the same value as !pred(x,y). - * - * \tparam Binary Predicate is a model of Adaptable Binary Predicate. - * - * \see binary_negate - * \see not1 - */ -template - __host__ __device__ - binary_negate not2(const BinaryPredicate &pred); - -/*! \} - */ - - -/*! \addtogroup placeholder_objects Placeholder Objects - * \ingroup function_objects - * \{ - */ - - -/*! \namespace placeholders - * \brief Facilities for constructing simple functions inline. - * - * Objects in the \p thrust::placeholders namespace may be used to create simple arithmetic functions inline - * in an algorithm invocation. Combining placeholders such as \p _1 and \p _2 with arithmetic operations such as \c + - * creates an unnamed function object which applies the operation to their arguments. - * - * The type of placeholder objects is implementation-defined. - * - * The following code snippet demonstrates how to use the placeholders \p _1 and \p _2 with \p thrust::transform - * to implement the SAXPY computation: - * - * \code - * #include - * #include - * #include - * - * int main() - * { - * thrust::device_vector x(4), y(4); - * x[0] = 1; - * x[1] = 2; - * x[2] = 3; - * x[3] = 4; - * - * y[0] = 1; - * y[1] = 1; - * y[2] = 1; - * y[3] = 1; - * - * float a = 2.0f; - * - * using namespace thrust::placeholders; - * - * thrust::transform(x.begin(), x.end(), y.begin(), y.begin(), - * a * _1 + 2 - * ); - * - * // y is now {3, 5, 7, 9} - * } - * \endcode - */ -namespace placeholders -{ - - -/*! \p thrust::placeholders::_1 is the placeholder for the first function parameter. - */ -static const thrust::detail::functional::placeholder<0>::type _1; - - -/*! \p thrust::placeholders::_2 is the placeholder for the second function parameter. - */ -static const thrust::detail::functional::placeholder<1>::type _2; - - -/*! \p thrust::placeholders::_3 is the placeholder for the third function parameter. - */ -static const thrust::detail::functional::placeholder<2>::type _3; - - -/*! \p thrust::placeholders::_4 is the placeholder for the fourth function parameter. - */ -static const thrust::detail::functional::placeholder<3>::type _4; - - -/*! \p thrust::placeholders::_5 is the placeholder for the fifth function parameter. - */ -static const thrust::detail::functional::placeholder<4>::type _5; - - -/*! \p thrust::placeholders::_6 is the placeholder for the sixth function parameter. - */ -static const thrust::detail::functional::placeholder<5>::type _6; - - -/*! \p thrust::placeholders::_7 is the placeholder for the seventh function parameter. - */ -static const thrust::detail::functional::placeholder<6>::type _7; - - -/*! \p thrust::placeholders::_8 is the placeholder for the eighth function parameter. - */ -static const thrust::detail::functional::placeholder<7>::type _8; - - -/*! \p thrust::placeholders::_9 is the placeholder for the ninth function parameter. - */ -static const thrust::detail::functional::placeholder<8>::type _9; - - -/*! \p thrust::placeholders::_10 is the placeholder for the tenth function parameter. - */ -static const thrust::detail::functional::placeholder<9>::type _10; - - -} // end placeholders - - -/*! \} // placeholder_objects - */ - - -} // end thrust - -#include -#include - diff --git a/compat/thrust/gather.h b/compat/thrust/gather.h deleted file mode 100644 index f2b8233657..0000000000 --- a/compat/thrust/gather.h +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file gather.h - * \brief Irregular copying from a source range - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup gathering - * \ingroup copying - * \{ - */ - - -/*! \p gather copies elements from a source array into a destination range according - * to a map. For each input iterator \c i in the range [map_first, map_last), the - * value input_first[\*i] is assigned to *(result + (i - map_first)). - * \p RandomAccessIterator must permit random access. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param map_first Beginning of the range of gather locations. - * \param map_last End of the range of gather locations. - * \param input_first Beginning of the source range. - * \param result Beginning of the destination range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam RandomAccessIterator must be a model of Random Access Iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator must be a model of Output Iterator. - * - * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). - * - * \remark \p gather is the inverse of thrust::scatter. - * - * The following code snippet demonstrates how to use \p gather to reorder - * a range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * // mark even indices with a 1; odd indices with a 0 - * int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - * thrust::device_vector d_values(values, values + 10); - * - * // gather all even indices into the first half of the range - * // and odd indices to the last half of the range - * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; - * thrust::device_vector d_map(map, map + 10); - * - * thrust::device_vector d_output(10); - * thrust::gather(thrust::device, - * d_map.begin(), d_map.end(), - * d_values.begin(), - * d_output.begin()); - * // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} - * \endcode - */ -template - OutputIterator gather(const thrust::detail::execution_policy_base &exec, - InputIterator map_first, - InputIterator map_last, - RandomAccessIterator input_first, - OutputIterator result); - - -/*! \p gather copies elements from a source array into a destination range according - * to a map. For each input iterator \c i in the range [map_first, map_last), the - * value input_first[\*i] is assigned to *(result + (i - map_first)). - * \p RandomAccessIterator must permit random access. - * - * \param map_first Beginning of the range of gather locations. - * \param map_last End of the range of gather locations. - * \param input_first Beginning of the source range. - * \param result Beginning of the destination range. - * - * \tparam InputIterator must be a model of Input Iterator and \c InputIterator's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam RandomAccessIterator must be a model of Random Access Iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator must be a model of Output Iterator. - * - * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). - * - * \remark \p gather is the inverse of thrust::scatter. - * - * The following code snippet demonstrates how to use \p gather to reorder - * a range. - * - * \code - * #include - * #include - * ... - * // mark even indices with a 1; odd indices with a 0 - * int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - * thrust::device_vector d_values(values, values + 10); - * - * // gather all even indices into the first half of the range - * // and odd indices to the last half of the range - * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; - * thrust::device_vector d_map(map, map + 10); - * - * thrust::device_vector d_output(10); - * thrust::gather(d_map.begin(), d_map.end(), - * d_values.begin(), - * d_output.begin()); - * // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} - * \endcode - */ -template - OutputIterator gather(InputIterator map_first, - InputIterator map_last, - RandomAccessIterator input_first, - OutputIterator result); - - -/*! \p gather_if conditionally copies elements from a source array into a destination - * range according to a map. For each input iterator \c i in the range [map_first, map_last), - * such that the value of \*(stencil + (i - map_first)) is \c true, the value - * input_first[\*i] is assigned to *(result + (i - map_first)). - * \p RandomAccessIterator must permit random access. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param map_first Beginning of the range of gather locations. - * \param map_last End of the range of gather locations. - * \param stencil Beginning of the range of predicate values. - * \param input_first Beginning of the source range. - * \param result Beginning of the destination range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c bool. - * \tparam RandomAccessIterator must be a model of Random Access iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator must be a model of Output Iterator. - * - * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). - * \pre The range [stencil, stencil + (map_last - map_first)) shall not overlap the range [result, result + (map_last - map_first)). - * - * \remark \p gather_if is the inverse of \p scatter_if. - * - * The following code snippet demonstrates how to use \p gather_if to gather selected values from - * an input range using the \p thrust::device execution policy: - * - * \code - * #include - * #include - * #include - * ... - * - * int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - * thrust::device_vector d_values(values, values + 10); - * - * // select elements at even-indexed locations - * int stencil[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - * thrust::device_vector d_stencil(stencil, stencil + 10); - * - * // map all even indices into the first half of the range - * // and odd indices to the last half of the range - * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; - * thrust::device_vector d_map(map, map + 10); - * - * thrust::device_vector d_output(10, 7); - * thrust::gather_if(thrust::device, - * d_map.begin(), d_map.end(), - * d_stencil.begin(), - * d_values.begin(), - * d_output.begin()); - * // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7} - * \endcode - */ -template - OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result); - - -/*! \p gather_if conditionally copies elements from a source array into a destination - * range according to a map. For each input iterator \c i in the range [map_first, map_last), - * such that the value of \*(stencil + (i - map_first)) is \c true, the value - * input_first[\*i] is assigned to *(result + (i - map_first)). - * \p RandomAccessIterator must permit random access. - * - * \param map_first Beginning of the range of gather locations. - * \param map_last End of the range of gather locations. - * \param stencil Beginning of the range of predicate values. - * \param input_first Beginning of the source range. - * \param result Beginning of the destination range. - * - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c bool. - * \tparam RandomAccessIterator must be a model of Random Access iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator must be a model of Output Iterator. - * - * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). - * \pre The range [stencil, stencil + (map_last - map_first)) shall not overlap the range [result, result + (map_last - map_first)). - * - * \remark \p gather_if is the inverse of \p scatter_if. - * - * The following code snippet demonstrates how to use \p gather_if to gather selected values from - * an input range. - * - * \code - * #include - * #include - * ... - * - * int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - * thrust::device_vector d_values(values, values + 10); - * - * // select elements at even-indexed locations - * int stencil[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - * thrust::device_vector d_stencil(stencil, stencil + 10); - * - * // map all even indices into the first half of the range - * // and odd indices to the last half of the range - * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; - * thrust::device_vector d_map(map, map + 10); - * - * thrust::device_vector d_output(10, 7); - * thrust::gather_if(d_map.begin(), d_map.end(), - * d_stencil.begin(), - * d_values.begin(), - * d_output.begin()); - * // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7} - * \endcode - */ -template - OutputIterator gather_if(InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result); - - -/*! \p gather_if conditionally copies elements from a source array into a destination - * range according to a map. For each input iterator \c i in the range [map_first, map_last) - * such that the value of pred(\*(stencil + (i - map_first))) is \c true, - * the value input_first[\*i] is assigned to *(result + (i - map_first)). - * \p RandomAccessIterator must permit random access. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param map_first Beginning of the range of gather locations. - * \param map_last End of the range of gather locations. - * \param stencil Beginning of the range of predicate values. - * \param input_first Beginning of the source range. - * \param result Beginning of the destination range. - * \param pred Predicate to apply to the stencil values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c Predicate's \c argument_type. - * \tparam RandomAccessIterator must be a model of Random Access iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator must be a model of Output Iterator. - * \tparam Predicate must be a model of Predicate. - * - * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). - * \pre The range [stencil, stencil + (map_last - map_first)) shall not overlap the range [result, result + (map_last - map_first)). - * - * \remark \p gather_if is the inverse of \p scatter_if. - * - * The following code snippet demonstrates how to use \p gather_if to gather selected values from - * an input range based on an arbitrary selection function using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * - * int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - * thrust::device_vector d_values(values, values + 10); - * - * // we will select an element when our stencil is even - * int stencil[10] = {0, 3, 4, 1, 4, 1, 2, 7, 8, 9}; - * thrust::device_vector d_stencil(stencil, stencil + 10); - * - * // map all even indices into the first half of the range - * // and odd indices to the last half of the range - * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; - * thrust::device_vector d_map(map, map + 10); - * - * thrust::device_vector d_output(10, 7); - * thrust::gather_if(thrust::device, - * d_map.begin(), d_map.end(), - * d_stencil.begin(), - * d_values.begin(), - * d_output.begin(), - * is_even()); - * // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7} - * \endcode - */ -template - OutputIterator gather_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result, - Predicate pred); - - -/*! \p gather_if conditionally copies elements from a source array into a destination - * range according to a map. For each input iterator \c i in the range [map_first, map_last) - * such that the value of pred(\*(stencil + (i - map_first))) is \c true, - * the value input_first[\*i] is assigned to *(result + (i - map_first)). - * \p RandomAccessIterator must permit random access. - * - * \param map_first Beginning of the range of gather locations. - * \param map_last End of the range of gather locations. - * \param stencil Beginning of the range of predicate values. - * \param input_first Beginning of the source range. - * \param result Beginning of the destination range. - * \param pred Predicate to apply to the stencil values. - * - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c Predicate's \c argument_type. - * \tparam RandomAccessIterator must be a model of Random Access iterator and \c RandomAccessIterator's \c value_type must be convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator must be a model of Output Iterator. - * \tparam Predicate must be a model of Predicate. - * - * \pre The range [map_first, map_last) shall not overlap the range [result, result + (map_last - map_first)). - * \pre The range [stencil, stencil + (map_last - map_first)) shall not overlap the range [result, result + (map_last - map_first)). - * - * \remark \p gather_if is the inverse of \p scatter_if. - * - * The following code snippet demonstrates how to use \p gather_if to gather selected values from - * an input range based on an arbitrary selection function. - * - * \code - * #include - * #include - * - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * - * int values[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - * thrust::device_vector d_values(values, values + 10); - * - * // we will select an element when our stencil is even - * int stencil[10] = {0, 3, 4, 1, 4, 1, 2, 7, 8, 9}; - * thrust::device_vector d_stencil(stencil, stencil + 10); - * - * // map all even indices into the first half of the range - * // and odd indices to the last half of the range - * int map[10] = {0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; - * thrust::device_vector d_map(map, map + 10); - * - * thrust::device_vector d_output(10, 7); - * thrust::gather_if(d_map.begin(), d_map.end(), - * d_stencil.begin(), - * d_values.begin(), - * d_output.begin(), - * is_even()); - * // d_output is now {0, 7, 4, 7, 8, 7, 3, 7, 7, 7} - * \endcode - */ -template - OutputIterator gather_if(InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result, - Predicate pred); - -/*! \} // gathering - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/generate.h b/compat/thrust/generate.h deleted file mode 100644 index 1d52721a78..0000000000 --- a/compat/thrust/generate.h +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file generate.h - * \brief Fills a range with values "generated" from a function of no arguments - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup transformations - * \{ - */ - - -/*! \p generate assigns the result of invoking \p gen, a function object that takes no arguments, - * to each element in the range [first,last). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element in the range of interest. - * \param last The last element in the range of interest. - * \param gen A function argument, taking no parameters, used to generate values to assign to - * elements in the range [first,last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam Generator is a model of Generator, - * and \p Generator's \c result_type is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to fill a \c host_vector with random numbers, - * using the standard C library function \c rand using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::host_vector v(10); - * srand(13); - * thrust::generate(thrust::host, v.begin(), v.end(), rand); - * - * // the elements of v are now pseudo-random numbers - * \endcode - * - * \see generate_n - * \see http://www.sgi.com/tech/stl/generate.html - */ -template - void generate(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Generator gen); - - -/*! \p generate assigns the result of invoking \p gen, a function object that takes no arguments, - * to each element in the range [first,last). - * - * \param first The first element in the range of interest. - * \param last The last element in the range of interest. - * \param gen A function argument, taking no parameters, used to generate values to assign to - * elements in the range [first,last). - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam Generator is a model of Generator, - * and \p Generator's \c result_type is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to fill a \c host_vector with random numbers, - * using the standard C library function \c rand. - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::host_vector v(10); - * srand(13); - * thrust::generate(v.begin(), v.end(), rand); - * - * // the elements of v are now pseudo-random numbers - * \endcode - * - * \see generate_n - * \see http://www.sgi.com/tech/stl/generate.html - */ -template - void generate(ForwardIterator first, - ForwardIterator last, - Generator gen); - - -/*! \p generate_n assigns the result of invoking \p gen, a function object that takes no arguments, - * to each element in the range [first,first + n). The return value is first + n. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element in the range of interest. - * \param n The size of the range of interest. - * \param gen A function argument, taking no parameters, used to generate values to assign to - * elements in the range [first,first + n). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Size is an integral type (either signed or unsigned). - * \tparam Generator is a model of Generator, - * and \p Generator's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types. - * - * The following code snippet demonstrates how to fill a \c host_vector with random numbers, - * using the standard C library function \c rand using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::host_vector v(10); - * srand(13); - * thrust::generate_n(thrust::host, v.begin(), 10, rand); - * - * // the elements of v are now pseudo-random numbers - * \endcode - * - * \see generate - * \see http://www.sgi.com/tech/stl/generate.html - */ -template - OutputIterator generate_n(const thrust::detail::execution_policy_base &exec, - OutputIterator first, - Size n, - Generator gen); - - -/*! \p generate_n assigns the result of invoking \p gen, a function object that takes no arguments, - * to each element in the range [first,first + n). The return value is first + n. - * - * \param first The first element in the range of interest. - * \param n The size of the range of interest. - * \param gen A function argument, taking no parameters, used to generate values to assign to - * elements in the range [first,first + n). - * - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Size is an integral type (either signed or unsigned). - * \tparam Generator is a model of Generator, - * and \p Generator's \c result_type is convertible to a type in \p OutputIterator's set of \c value_types. - * - * The following code snippet demonstrates how to fill a \c host_vector with random numbers, - * using the standard C library function \c rand. - * - * \code - * #include - * #include - * #include - * ... - * thrust::host_vector v(10); - * srand(13); - * thrust::generate_n(v.begin(), 10, rand); - * - * // the elements of v are now pseudo-random numbers - * \endcode - * - * \see generate - * \see http://www.sgi.com/tech/stl/generate.html - */ -template - OutputIterator generate_n(OutputIterator first, - Size n, - Generator gen); - - -/*! \} // end transformations - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/host_vector.h b/compat/thrust/host_vector.h deleted file mode 100644 index 11b1ae0685..0000000000 --- a/compat/thrust/host_vector.h +++ /dev/null @@ -1,424 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file host_vector.h - * \brief A dynamically-sizable array of elements which reside in the "host" memory space - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -// forward declaration of device_vector -template class device_vector; - -/*! \addtogroup container_classes Container Classes - * \addtogroup host_containers Host Containers - * \ingroup container_classes - * \{ - */ - -/*! A \p host_vector is a container that supports random access to elements, - * constant time removal of elements at the end, and linear time insertion - * and removal of elements at the beginning or in the middle. The number of - * elements in a \p host_vector may vary dynamically; memory management is - * automatic. The memory associated with a \p host_vector resides in the memory - * space of the host associated with a parallel device. - * - * \see http://www.sgi.com/tech/stl/Vector.html - * \see device_vector - */ -template > - class host_vector - : public detail::vector_base -{ - private: - typedef detail::vector_base Parent; - - public: - /*! \cond */ - typedef typename Parent::size_type size_type; - typedef typename Parent::value_type value_type; - /*! \endcond */ - - /*! This constructor creates an empty \p host_vector. - */ - __host__ - host_vector(void) - :Parent() {} - - /*! This constructor creates a \p host_vector with the given - * size. - * \param n The number of elements to initially craete. - */ - __host__ - explicit host_vector(size_type n) - :Parent(n) {} - - /*! This constructor creates a \p host_vector with copies - * of an exemplar element. - * \param n The number of elements to initially create. - * \param value An element to copy. - */ - __host__ - explicit host_vector(size_type n, const value_type &value) - :Parent(n,value) {} - - /*! Copy constructor copies from an exemplar \p host_vector. - * \param v The \p host_vector to copy. - */ - __host__ - host_vector(const host_vector &v) - :Parent(v) {} - - /*! Assign operator copies from an exemplar \p host_vector. - * \param v The \p host_vector to copy. - */ - __host__ - host_vector &operator=(const host_vector &v) - { Parent::operator=(v); return *this; } - - /*! Copy constructor copies from an exemplar \p host_vector with different type. - * \param v The \p host_vector to copy. - */ - template - __host__ - host_vector(const host_vector &v) - :Parent(v) {} - - /*! Assign operator copies from an exemplar \p host_vector with different type. - * \param v The \p host_vector to copy. - */ - template - __host__ - host_vector &operator=(const host_vector &v) - { Parent::operator=(v); return *this; } - - /*! Copy constructor copies from an exemplar std::vector. - * \param v The std::vector to copy. - */ - template - __host__ - host_vector(const std::vector &v) - :Parent(v) {} - - /*! Assign operator copies from an exemplar std::vector. - * \param v The std::vector to copy. - */ - template - __host__ - host_vector &operator=(const std::vector &v) - { Parent::operator=(v); return *this;} - - /*! Copy constructor copies from an exemplar \p device_vector with possibly different type. - * \param v The \p device_vector to copy. - */ - template - __host__ - host_vector(const device_vector &v); - - /*! Assign operator copies from an exemplar \p device_vector. - * \param v The \p device_vector to copy. - */ - template - __host__ - host_vector &operator=(const device_vector &v) - { Parent::operator=(v); return *this; } - - /*! This constructor builds a \p host_vector from a range. - * \param first The beginning of the range. - * \param last The end of the range. - */ - template - __host__ - host_vector(InputIterator first, InputIterator last) - :Parent(first, last) {} - -// declare these members for the purpose of Doxygenating them -// they actually exist in a derived-from class -#if 0 - /*! \brief Resizes this vector to the specified number of elements. - * \param new_size Number of elements this vector should contain. - * \param x Data with which new elements should be populated. - * \throw std::length_error If n exceeds max_size(). - * - * This method will resize this vector to the specified number of - * elements. If the number is smaller than this vector's current - * size this vector is truncated, otherwise this vector is - * extended and new elements are populated with given data. - */ - void resize(size_type new_size, const value_type &x = value_type()); - - /*! Returns the number of elements in this vector. - */ - size_type size(void) const; - - /*! Returns the size() of the largest possible vector. - * \return The largest possible return value of size(). - */ - size_type max_size(void) const; - - /*! \brief If n is less than or equal to capacity(), this call has no effect. - * Otherwise, this method is a request for allocation of additional memory. If - * the request is successful, then capacity() is greater than or equal to - * n; otherwise, capacity() is unchanged. In either case, size() is unchanged. - * \throw std::length_error If n exceeds max_size(). - */ - void reserve(size_type n); - - /*! Returns the number of elements which have been reserved in this - * vector. - */ - size_type capacity(void) const; - - /*! This method shrinks the capacity of this vector to exactly - * fit its elements. - */ - void shrink_to_fit(void); - - /*! \brief Subscript access to the data contained in this vector_dev. - * \param n The index of the element for which data should be accessed. - * \return Read/write reference to data. - * - * This operator allows for easy, array-style, data access. - * Note that data access with this operator is unchecked and - * out_of_range lookups are not defined. - */ - reference operator[](size_type n); - - /*! \brief Subscript read access to the data contained in this vector_dev. - * \param n The index of the element for which data should be accessed. - * \return Read reference to data. - * - * This operator allows for easy, array-style, data access. - * Note that data access with this operator is unchecked and - * out_of_range lookups are not defined. - */ - const_reference operator[](size_type n) const; - - /*! This method returns an iterator pointing to the beginning of - * this vector. - * \return mStart - */ - iterator begin(void); - - /*! This method returns a const_iterator pointing to the beginning - * of this vector. - * \return mStart - */ - const_iterator begin(void) const; - - /*! This method returns a const_iterator pointing to the beginning - * of this vector. - * \return mStart - */ - const_iterator cbegin(void) const; - - /*! This method returns a reverse_iterator pointing to the beginning of - * this vector's reversed sequence. - * \return A reverse_iterator pointing to the beginning of this - * vector's reversed sequence. - */ - reverse_iterator rbegin(void); - - /*! This method returns a const_reverse_iterator pointing to the beginning of - * this vector's reversed sequence. - * \return A const_reverse_iterator pointing to the beginning of this - * vector's reversed sequence. - */ - const_reverse_iterator rbegin(void) const; - - /*! This method returns a const_reverse_iterator pointing to the beginning of - * this vector's reversed sequence. - * \return A const_reverse_iterator pointing to the beginning of this - * vector's reversed sequence. - */ - const_reverse_iterator crbegin(void) const; - - /*! This method returns an iterator pointing to one element past the - * last of this vector. - * \return begin() + size(). - */ - iterator end(void); - - /*! This method returns a const_iterator pointing to one element past the - * last of this vector. - * \return begin() + size(). - */ - const_iterator end(void) const; - - /*! This method returns a const_iterator pointing to one element past the - * last of this vector. - * \return begin() + size(). - */ - const_iterator cend(void) const; - - /*! This method returns a reverse_iterator pointing to one element past the - * last of this vector's reversed sequence. - * \return rbegin() + size(). - */ - reverse_iterator rend(void); - - /*! This method returns a const_reverse_iterator pointing to one element past the - * last of this vector's reversed sequence. - * \return rbegin() + size(). - */ - const_reverse_iterator rend(void) const; - - /*! This method returns a const_reverse_iterator pointing to one element past the - * last of this vector's reversed sequence. - * \return rbegin() + size(). - */ - const_reverse_iterator crend(void) const; - - /*! This method returns a const_reference referring to the first element of this - * vector. - * \return The first element of this vector. - */ - const_reference front(void) const; - - /*! This method returns a reference pointing to the first element of this - * vector. - * \return The first element of this vector. - */ - reference front(void); - - /*! This method returns a const reference pointing to the last element of - * this vector. - * \return The last element of this vector. - */ - const_reference back(void) const; - - /*! This method returns a reference referring to the last element of - * this vector_dev. - * \return The last element of this vector. - */ - reference back(void); - - /*! This method returns a pointer to this vector's first element. - * \return A pointer to the first element of this vector. - */ - pointer data(void); - - /*! This method returns a const_pointer to this vector's first element. - * \return a const_pointer to the first element of this vector. - */ - const_pointer data(void) const; - - /*! This method resizes this vector to 0. - */ - void clear(void); - - /*! This method returns true iff size() == 0. - * \return true if size() == 0; false, otherwise. - */ - bool empty(void) const; - - /*! This method appends the given element to the end of this vector. - * \param x The element to append. - */ - void push_back(const value_type &x); - - /*! This method erases the last element of this vector, invalidating - * all iterators and references to it. - */ - void pop_back(void); - - /*! This method swaps the contents of this vector_base with another vector. - * \param v The vector with which to swap. - */ - void swap(host_vector &v); - - /*! This method removes the element at position pos. - * \param pos The position of the element of interest. - * \return An iterator pointing to the new location of the element that followed the element - * at position pos. - */ - iterator erase(iterator pos); - - /*! This method removes the range of elements [first,last) from this vector. - * \param first The beginning of the range of elements to remove. - * \param last The end of the range of elements to remove. - * \return An iterator pointing to the new location of the element that followed the last - * element in the sequence [first,last). - */ - iterator erase(iterator first, iterator last); - - /*! This method inserts a single copy of a given exemplar value at the - * specified position in this vector. - * \param position The insertion position. - * \param x The exemplar element to copy & insert. - * \return An iterator pointing to the newly inserted element. - */ - iterator insert(iterator position, const T &x); - - /*! This method inserts a copy of an exemplar value to a range at the - * specified position in this vector. - * \param position The insertion position - * \param n The number of insertions to perform. - * \param x The value to replicate and insert. - */ - void insert(iterator position, size_type n, const T &x); - - /*! This method inserts a copy of an input range at the specified position - * in this vector. - * \param position The insertion position. - * \param first The beginning of the range to copy. - * \param last The end of the range to copy. - * - * \tparam InputIterator is a model of Assignable. - */ - template - void insert(iterator position, InputIterator first, InputIterator last); - - /*! This version of \p assign replicates a given exemplar - * \p n times into this vector. - * \param n The number of times to copy \p x. - * \param x The exemplar element to replicate. - */ - void assign(size_type n, const T &x); - - /*! This version of \p assign makes this vector a copy of a given input range. - * \param first The beginning of the range to copy. - * \param last The end of the range to copy. - * - * \tparam InputIterator is a model of Input Iterator. - */ - template - void assign(InputIterator first, InputIterator last); - - /*! This method returns a copy of this vector's allocator. - * \return A copy of the alloctor used by this vector. - */ - allocator_type get_allocator(void) const; -#endif // end doxygen-only members -}; // end host_vector - -/*! \} - */ - -} // end thrust - -#include - diff --git a/compat/thrust/inner_product.h b/compat/thrust/inner_product.h deleted file mode 100644 index 01f55414bd..0000000000 --- a/compat/thrust/inner_product.h +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file inner_product.h - * \brief Mathematical inner product between ranges - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup reductions - * \{ - * \addtogroup transformed_reductions Transformed Reductions - * \ingroup reductions - * \{ - */ - - -/*! \p inner_product calculates an inner product of the ranges - * [first1, last1) and [first2, first2 + (last1 - first1)). - * - * Specifically, this version of \p inner_product computes the sum - * init + (*first1 * *first2) + (*(first1+1) * *(first2+1)) + ... - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \param init Initial value of the result. - * \return The inner product of sequences [first1, last1) - * and [first2, last2) plus \p init. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputType is a model of Assignable, - * and if \c x is an object of type \p OutputType, and \c y is an object of \p InputIterator1's \c value_type, - * and \c z is an object of \p InputIterator2's \c value_type, then x + y * z is defined - * and is convertible to \p OutputType. - * - * The following code demonstrates how to use \p inner_product to - * compute the dot product of two vectors using the \p thrust::host execution policy for parallelization. - * - * \code - * #include - * #include - * ... - * float vec1[3] = {1.0f, 2.0f, 5.0f}; - * float vec2[3] = {4.0f, 1.0f, 5.0f}; - * - * float result = thrust::inner_product(thrust::host, vec1, vec1 + 3, vec2, 0.0f); - * - * // result == 31.0f - * \endcode - * - * \see http://www.sgi.com/tech/stl/inner_product.html - */ -template -OutputType inner_product(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputType init); - - -/*! \p inner_product calculates an inner product of the ranges - * [first1, last1) and [first2, first2 + (last1 - first1)). - * - * Specifically, this version of \p inner_product computes the sum - * init + (*first1 * *first2) + (*(first1+1) * *(first2+1)) + ... - * - * Unlike the C++ Standard Template Library function std::inner_product, - * this version offers no guarantee on order of execution. - * - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \param init Initial value of the result. - * \return The inner product of sequences [first1, last1) - * and [first2, last2) plus \p init. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputType is a model of Assignable, - * and if \c x is an object of type \p OutputType, and \c y is an object of \p InputIterator1's \c value_type, - * and \c z is an object of \p InputIterator2's \c value_type, then x + y * z is defined - * and is convertible to \p OutputType. - * - * The following code demonstrates how to use \p inner_product to - * compute the dot product of two vectors. - * - * \code - * #include - * ... - * float vec1[3] = {1.0f, 2.0f, 5.0f}; - * float vec2[3] = {4.0f, 1.0f, 5.0f}; - * - * float result = thrust::inner_product(vec1, vec1 + 3, vec2, 0.0f); - * - * // result == 31.0f - * \endcode - * - * \see http://www.sgi.com/tech/stl/inner_product.html - */ -template -OutputType inner_product(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputType init); - - -/*! \p inner_product calculates an inner product of the ranges - * [first1, last1) and [first2, first2 + (last1 - first1)). - * - * This version of \p inner_product is identical to the first, except that is uses - * two user-supplied function objects instead of \c operator+ and \c operator*. - * - * Specifically, this version of \p inner_product computes the sum - * binary_op1( init, binary_op2(*first1, *first2) ), ... - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \param init Initial value of the result. - * \param binary_op1 Generalized addition operation. - * \param binary_op2 Generalized multiplication operation. - * \return The inner product of sequences [first1, last1) and [first2, last2). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator1's \c value_type is convertible to \p BinaryFunction2's \c first_argument_type. - * \tparam InputIterator2 is a model of Input Iterator. - * and \p InputIterator2's \c value_type is convertible to \p BinaryFunction2's \c second_argument_type. - * \tparam OutputType is a model of Assignable, - * and \p OutputType is convertible to \p BinaryFunction1's \c first_argument_type. - * \tparam BinaryFunction1 is a model of Binary Function, - * and \p BinaryFunction1's \c return_type is convertible to \p OutputType. - * \tparam BinaryFunction2 is a model of Binary Function, - * and \p BinaryFunction2's \c return_type is convertible to \p BinaryFunction1's \c second_argument_type. - * - * \code - * #include - * #include - * ... - * float vec1[3] = {1.0f, 2.0f, 5.0f}; - * float vec2[3] = {4.0f, 1.0f, 5.0f}; - * - * float init = 0.0f; - * thrust::plus binary_op1; - * thrust::multiplies binary_op2; - * - * float result = thrust::inner_product(thrust::host, vec1, vec1 + 3, vec2, init, binary_op1, binary_op2); - * - * // result == 31.0f - * \endcode - * - * \see http://www.sgi.com/tech/stl/inner_product.html - */ -template -OutputType inner_product(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputType init, - BinaryFunction1 binary_op1, - BinaryFunction2 binary_op2); - - -/*! \p inner_product calculates an inner product of the ranges - * [first1, last1) and [first2, first2 + (last1 - first1)). - * - * This version of \p inner_product is identical to the first, except that is uses - * two user-supplied function objects instead of \c operator+ and \c operator*. - * - * Specifically, this version of \p inner_product computes the sum - * binary_op1( init, binary_op2(*first1, *first2) ), ... - * - * Unlike the C++ Standard Template Library function std::inner_product, - * this version offers no guarantee on order of execution. - * - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \param init Initial value of the result. - * \param binary_op1 Generalized addition operation. - * \param binary_op2 Generalized multiplication operation. - * \return The inner product of sequences [first1, last1) and [first2, last2). - * - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator1's \c value_type is convertible to \p BinaryFunction2's \c first_argument_type. - * \tparam InputIterator2 is a model of Input Iterator. - * and \p InputIterator2's \c value_type is convertible to \p BinaryFunction2's \c second_argument_type. - * \tparam OutputType is a model of Assignable, - * and \p OutputType is convertible to \p BinaryFunction1's \c first_argument_type. - * \tparam BinaryFunction1 is a model of Binary Function, - * and \p BinaryFunction1's \c return_type is convertible to \p OutputType. - * \tparam BinaryFunction2 is a model of Binary Function, - * and \p BinaryFunction2's \c return_type is convertible to \p BinaryFunction1's \c second_argument_type. - * - * \code - * #include - * ... - * float vec1[3] = {1.0f, 2.0f, 5.0f}; - * float vec2[3] = {4.0f, 1.0f, 5.0f}; - * - * float init = 0.0f; - * thrust::plus binary_op1; - * thrust::multiplies binary_op2; - * - * float result = thrust::inner_product(vec1, vec1 + 3, vec2, init, binary_op1, binary_op2); - * - * // result == 31.0f - * \endcode - * - * \see http://www.sgi.com/tech/stl/inner_product.html - */ -template -OutputType inner_product(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, OutputType init, - BinaryFunction1 binary_op1, BinaryFunction2 binary_op2); - - -/*! \} // end transformed_reductions - * \} // end reductions - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/iterator/constant_iterator.h b/compat/thrust/iterator/constant_iterator.h deleted file mode 100644 index e9e03c18c2..0000000000 --- a/compat/thrust/iterator/constant_iterator.h +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/constant_iterator.h - * \brief An iterator which returns a constant value when - * dereferenced - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - -/*! \p constant_iterator is an iterator which represents a pointer into a range - * of constant values. This iterator is useful for creating a range filled with the same - * value without explicitly storing it in memory. Using \p constant_iterator saves both - * memory capacity and bandwidth. - * - * The following code snippet demonstrates how to create a \p constant_iterator whose - * \c value_type is \c int and whose value is \c 10. - * - * \code - * #include - * - * thrust::constant_iterator iter(10); - * - * *iter; // returns 10 - * iter[0]; // returns 10 - * iter[1]; // returns 10 - * iter[13]; // returns 10 - * - * // and so on... - * \endcode - * - * This next example demonstrates how to use a \p constant_iterator with the - * \p thrust::transform function to increment all elements of a sequence by the - * same value. We will create a temporary \p constant_iterator with the function - * \p make_constant_iterator function in order to avoid explicitly specifying - * its type: - * - * \code - * #include - * #include - * #include - * #include - * - * int main(void) - * { - * thrust::device_vector data(4); - * data[0] = 3; - * data[1] = 7; - * data[2] = 2; - * data[3] = 5; - * - * // add 10 to all values in data - * thrust::transform(data.begin(), data.end(), - * thrust::make_constant_iterator(10), - * data.begin(), - * thrust::plus()); - * - * // data is now [13, 17, 12, 15] - * - * return 0; - * } - * \endcode - * - * \see make_constant_iterator - */ -template - class constant_iterator - : public detail::constant_iterator_base::type -{ - /*! \cond - */ - friend class thrust::iterator_core_access; - typedef typename detail::constant_iterator_base::type super_t; - typedef typename detail::constant_iterator_base::incrementable incrementable; - typedef typename detail::constant_iterator_base::base_iterator base_iterator; - - public: - typedef typename super_t::reference reference; - typedef typename super_t::value_type value_type; - - /*! \endcond - */ - - /*! Null constructor initializes this \p constant_iterator's constant using its - * null constructor. - */ - __host__ __device__ - constant_iterator(void) - : super_t(), m_value(){}; - - /*! Copy constructor copies the value of another \p constant_iterator into this - * \p constant_iterator. - * - * \p rhs The constant_iterator to copy. - */ - __host__ __device__ - constant_iterator(constant_iterator const &rhs) - : super_t(rhs.base()), m_value(rhs.m_value) {} - - /*! Copy constructor copies the value of another \p constant_iterator with related - * System type. - * - * \param rhs The \p constant_iterator to copy. - */ - template - __host__ __device__ - constant_iterator(constant_iterator const &rhs, - typename thrust::detail::enable_if_convertible< - typename thrust::iterator_system >::type, - typename thrust::iterator_system::type - >::type * = 0) - : super_t(rhs.base()), m_value(rhs.value()) {} - - /*! This constructor receives a value to use as the constant value of this - * \p constant_iterator and an index specifying the location of this - * \p constant_iterator in a sequence. - * - * \p v The value of this \p constant_iterator's constant value. - * \p i The index of this \p constant_iterator in a sequence. Defaults to the - * value returned by \c Incrementable's null constructor. For example, - * when Incrementable == int, \c 0. - */ - __host__ __device__ - constant_iterator(value_type const& v, incrementable const &i = incrementable()) - : super_t(base_iterator(i)), m_value(v) {} - - /*! This constructor is templated to allow construction from a value type and - * incrementable type related this this \p constant_iterator's respective types. - * - * \p v The value of this \p constant_iterator's constant value. - * \p i The index of this \p constant_iterator in a sequence. Defaults to the - * value returned by \c Incrementable's null constructor. For example, - * when Incrementable == int, \c 0. - */ - template - __host__ __device__ - constant_iterator(OtherValue const& v, OtherIncrementable const& i = incrementable()) - : super_t(base_iterator(i)), m_value(v) {} - - /*! This method returns the value of this \p constant_iterator's constant value. - * \return A \c const reference to this \p constant_iterator's constant value. - */ - __host__ __device__ - Value const& value(void) const - { return m_value; } - - /*! \cond - */ - - protected: - __host__ __device__ - Value const& value_reference(void) const - { return m_value; } - - __host__ __device__ - Value & value_reference(void) - { return m_value; } - - private: // Core iterator interface - __host__ __device__ - reference dereference(void) const - { - return m_value; - } - - private: - Value m_value; - - /*! \endcond - */ -}; // end constant_iterator - - -/*! This version of \p make_constant_iterator creates a \p constant_iterator - * from values given for both value and index. The type of \p constant_iterator - * may be inferred by the compiler from the types of its parameters. - * - * \param x The value of the returned \p constant_iterator's constant value. - * \param i The index of the returned \p constant_iterator within a sequence. - * The type of this parameter defaults to \c int. In the default case, - * the value of this parameter is \c 0. - * - * \return A new \p constant_iterator with constant value & index as given - * by \p x & \p i. - * - * \see constant_iterator - */ -template -inline __host__ __device__ -constant_iterator make_constant_iterator(V x, I i = int()) -{ - return constant_iterator(x, i); -} // end make_constant_iterator() - - -/*! This version of \p make_constant_iterator creates a \p constant_iterator - * using only a parameter for the desired constant value. The value of the - * returned \p constant_iterator's index is set to \c 0. - * - * \param x The value of the returned \p constant_iterator's constant value. - * \return A new \p constant_iterator with constant value equal to \p x and - * index equal to \c 0. - * \see constant_iterator - */ -template -inline __host__ __device__ -constant_iterator make_constant_iterator(V x) -{ - return constant_iterator(x, 0); -} // end make_constant_iterator() - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end namespace thrust - diff --git a/compat/thrust/iterator/counting_iterator.h b/compat/thrust/iterator/counting_iterator.h deleted file mode 100644 index 99812cae17..0000000000 --- a/compat/thrust/iterator/counting_iterator.h +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/counting_iterator.h - * \brief An iterator which returns an increasing incrementable value - * when dereferenced - */ - -/* - * Copyright David Abrahams 2003. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include -#include -#include -#include - -// #include the details first -#include - -namespace thrust -{ - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - -/*! \p counting_iterator is an iterator which represents a pointer into a range - * of sequentially changing values. This iterator is useful for creating a range - * filled with a sequence without explicitly storing it in memory. Using - * \p counting_iterator saves memory capacity and bandwidth. - * - * The following code snippet demonstrates how to create a \p counting_iterator whose - * \c value_type is \c int and which sequentially increments by \c 1. - * - * \code - * #include - * ... - * // create iterators - * thrust::counting_iterator first(10); - * thrust::counting_iterator last = first + 3; - * - * first[0] // returns 10 - * first[1] // returns 11 - * first[100] // returns 110 - * - * // sum of [first, last) - * thrust::reduce(first, last); // returns 33 (i.e. 10 + 11 + 12) - * - * // initialize vector to [0,1,2,..] - * thrust::counting_iterator iter(0); - * thrust::device_vector vec(500); - * thrust::copy(iter, iter + vec.size(), vec.begin()); - * \endcode - * - * This next example demonstrates how to use a \p counting_iterator with the - * \p thrust::copy_if function to compute the indices of the non-zero elements - * of a \p device_vector. In this example, we use the \p make_counting_iterator - * function to avoid specifying the type of the \p counting_iterator. - * - * \code - * #include - * #include - * #include - * #include - * - * int main(void) - * { - * // this example computes indices for all the nonzero values in a sequence - * - * // sequence of zero and nonzero values - * thrust::device_vector stencil(8); - * stencil[0] = 0; - * stencil[1] = 1; - * stencil[2] = 1; - * stencil[3] = 0; - * stencil[4] = 0; - * stencil[5] = 1; - * stencil[6] = 0; - * stencil[7] = 1; - * - * // storage for the nonzero indices - * thrust::device_vector indices(8); - * - * // compute indices of nonzero elements - * typedef thrust::device_vector::iterator IndexIterator; - * - * // use make_counting_iterator to define the sequence [0, 8) - * IndexIterator indices_end = thrust::copy_if(thrust::make_counting_iterator(0), - * thrust::make_counting_iterator(8), - * stencil.begin(), - * indices.begin(), - * thrust::identity()); - * // indices now contains [1,2,5,7] - * - * return 0; - * } - * \endcode - * - * \see make_counting_iterator - */ -template - class counting_iterator - : public detail::counting_iterator_base::type -{ - /*! \cond - */ - typedef typename detail::counting_iterator_base::type super_t; - - friend class thrust::iterator_core_access; - - public: - typedef typename super_t::reference reference; - typedef typename super_t::difference_type difference_type; - - /*! \endcond - */ - - /*! Null constructor initializes this \p counting_iterator's \c Incrementable - * counter using its null constructor. - */ - __host__ __device__ - counting_iterator(void){}; - - /*! Copy constructor copies the value of another \p counting_iterator into a - * new \p counting_iterator. - * - * \p rhs The \p counting_iterator to copy. - */ - __host__ __device__ - counting_iterator(counting_iterator const &rhs):super_t(rhs.base()){} - - /*! Copy constructor copies the value of another counting_iterator - * with related System type. - * - * \param rhs The \p counting_iterator to copy. - */ - template - __host__ __device__ - counting_iterator(counting_iterator const &rhs, - typename thrust::detail::enable_if_convertible< - typename thrust::iterator_system >::type, - typename thrust::iterator_system::type - >::type * = 0) - : super_t(rhs.base()){} - - /*! This \c explicit constructor copies the value of an \c Incrementable - * into a new \p counting_iterator's \c Incrementable counter. - * - * \param x The initial value of the new \p counting_iterator's \c Incrementable - * counter. - */ - __host__ __device__ - explicit counting_iterator(Incrementable x):super_t(x){} - - /*! \cond - */ - private: - __host__ __device__ - reference dereference(void) const - { - return this->base_reference(); - } - - // note that we implement equal specially for floating point counting_iterator - template - __host__ __device__ - bool equal(counting_iterator const& y) const - { - typedef thrust::detail::counting_iterator_equal e; - return e::equal(this->base(), y.base()); - } - - template - __host__ __device__ - difference_type - distance_to(counting_iterator const& y) const - { - typedef typename - thrust::detail::eval_if< - thrust::detail::is_numeric::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - >::type d; - - return d::distance(this->base(), y.base()); - } - - /*! \endcond - */ -}; // end counting_iterator - - -/*! \p make_counting_iterator creates a \p counting_iterator - * using an initial value for its \c Incrementable counter. - * - * \param x The initial value of the new \p counting_iterator's counter. - * \return A new \p counting_iterator whose counter has been initialized to \p x. - */ -template -inline __host__ __device__ -counting_iterator make_counting_iterator(Incrementable x) -{ - return counting_iterator(x); -} - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end thrust - diff --git a/compat/thrust/iterator/detail/any_assign.h b/compat/thrust/iterator/detail/any_assign.h deleted file mode 100644 index e08a829ec0..0000000000 --- a/compat/thrust/iterator/detail/any_assign.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace detail -{ - - -// a type which may be assigned any other type -struct any_assign -{ - inline __host__ __device__ any_assign(void) - {} - - template - inline __host__ __device__ any_assign(T) - {} - - template - inline __host__ __device__ - any_assign &operator=(T) - { - if(0) - { - // trick the compiler into silencing "warning: this expression has no effect" - int *x = 0; - *x = 13; - } // end if - - return *this; - } -}; - - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/any_system_tag.h b/compat/thrust/iterator/detail/any_system_tag.h deleted file mode 100644 index fc6417ad8a..0000000000 --- a/compat/thrust/iterator/detail/any_system_tag.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -struct any_system_tag - : thrust::execution_policy -{ - // allow any_system_tag to convert to any type at all - // XXX make this safer using enable_if> upon c++11 - template operator T () const {return T();} -}; - -// TODO remove this in 1.7.0 -typedef THRUST_DEPRECATED any_system_tag any_space_tag; - -} // end thrust - diff --git a/compat/thrust/iterator/detail/constant_iterator_base.h b/compat/thrust/iterator/detail/constant_iterator_base.h deleted file mode 100644 index 276e5ff0ef..0000000000 --- a/compat/thrust/iterator/detail/constant_iterator_base.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -// forward declaration of constant_iterator -template class constant_iterator; - -namespace detail -{ - -template - struct constant_iterator_base -{ - typedef Value value_type; - - // the reference type is the same as the value_type. - // we wish to avoid returning a reference to the internal state - // of the constant_iterator, which is prone to subtle bugs. - // consider the temporary iterator created in the expression - // *(iter + i) - typedef value_type reference; - - // the incrementable type is int unless otherwise specified - typedef typename thrust::detail::ia_dflt_help< - Incrementable, - thrust::detail::identity_ - >::type incrementable; - - typedef typename thrust::counting_iterator< - incrementable, - System, - thrust::random_access_traversal_tag - > base_iterator; - - typedef typename thrust::iterator_adaptor< - constant_iterator, - base_iterator, - value_type, // XXX we may need to pass const value_type here as boost counting_iterator does - typename thrust::iterator_system::type, - typename thrust::iterator_traversal::type, - reference - > type; -}; // end constant_iterator_base - -} // end detail - -} // end thrust - diff --git a/compat/thrust/iterator/detail/counting_iterator.inl b/compat/thrust/iterator/detail/counting_iterator.inl deleted file mode 100644 index ad4fcffaa6..0000000000 --- a/compat/thrust/iterator/detail/counting_iterator.inl +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ - -// forward declaration of counting_iterator -template - class counting_iterator; - -namespace detail -{ - -template - struct counting_iterator_base -{ - typedef typename thrust::detail::eval_if< - // use any_system_tag if we are given use_default - thrust::detail::is_same::value, - thrust::detail::identity_, - thrust::detail::identity_ - >::type system; - - typedef typename thrust::detail::ia_dflt_help< - Traversal, - thrust::detail::eval_if< - thrust::detail::is_numeric::value, - thrust::detail::identity_, - thrust::iterator_traversal - > - >::type traversal; - - // unlike Boost, we explicitly use std::ptrdiff_t as the difference type - // for floating point counting_iterators - typedef typename thrust::detail::ia_dflt_help< - Difference, - thrust::detail::eval_if< - thrust::detail::is_numeric::value, - thrust::detail::eval_if< - thrust::detail::is_integral::value, - thrust::detail::numeric_difference, - thrust::detail::identity_ - >, - thrust::iterator_difference - > - >::type difference; - - // our implementation departs from Boost's in that counting_iterator::dereference - // returns a copy of its counter, rather than a reference to it. returning a reference - // to the internal state of an iterator causes subtle bugs (consider the temporary - // iterator created in the expression *(iter + i) ) and has no compelling use case - typedef thrust::iterator_adaptor< - counting_iterator, // self - Incrementable, // Base - Incrementable, // XXX we may need to pass const here as Boost does - system, - traversal, - Incrementable, - difference - > type; -}; // end counting_iterator_base - - -template - struct iterator_distance -{ - __host__ __device__ - static Difference distance(Incrementable1 x, Incrementable2 y) - { - return y - x; - } -}; - - -template - struct number_distance -{ - __host__ __device__ - static Difference distance(Incrementable1 x, Incrementable2 y) - { - return static_cast(numeric_distance(x,y)); - } -}; - - -template - struct counting_iterator_equal -{ - __host__ __device__ - static bool equal(Incrementable1 x, Incrementable2 y) - { - return x == y; - } -}; - - -// specialization for floating point equality -template - struct counting_iterator_equal< - Difference, - Incrementable1, - Incrementable2, - typename thrust::detail::enable_if< - thrust::detail::is_floating_point::value || - thrust::detail::is_floating_point::value - >::type - > -{ - __host__ __device__ - static bool equal(Incrementable1 x, Incrementable2 y) - { - typedef number_distance d; - return d::distance(x,y) == 0; - } -}; - - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/device_system_tag.h b/compat/thrust/iterator/detail/device_system_tag.h deleted file mode 100644 index ab66fb48bf..0000000000 --- a/compat/thrust/iterator/detail/device_system_tag.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// #include the device system's execution_policy header -#define __THRUST_DEVICE_SYSTEM_TAG_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/execution_policy.h> -#include __THRUST_DEVICE_SYSTEM_TAG_HEADER -#undef __THRUST_DEVICE_SYSTEM_TAG_HEADER - -namespace thrust -{ - -typedef thrust::system::__THRUST_DEVICE_SYSTEM_NAMESPACE::tag device_system_tag; - -} // end thrust - -// TODO remove this in 1.8.0 -namespace thrust -{ - -typedef THRUST_DEPRECATED device_system_tag device_space_tag; - -} // end thrust - diff --git a/compat/thrust/iterator/detail/discard_iterator_base.h b/compat/thrust/iterator/detail/discard_iterator_base.h deleted file mode 100644 index 1909ca8239..0000000000 --- a/compat/thrust/iterator/detail/discard_iterator_base.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include // for std::ptrdiff_t - -namespace thrust -{ - -// forward declaration of discard_iterator -template class discard_iterator; - -namespace detail -{ - - -template - struct discard_iterator_base -{ - // XXX value_type should actually be void - // but this interferes with zip_iterator - typedef any_assign value_type; - typedef any_assign& reference; - typedef std::ptrdiff_t incrementable; - - typedef typename thrust::counting_iterator< - incrementable, - System, - thrust::random_access_traversal_tag - > base_iterator; - - typedef typename thrust::iterator_adaptor< - discard_iterator, - base_iterator, - value_type, - typename thrust::iterator_system::type, - typename thrust::iterator_traversal::type, - reference - > type; -}; // end discard_iterator_base - - -} // end detail - -} // end thrust - - diff --git a/compat/thrust/iterator/detail/distance_from_result.h b/compat/thrust/iterator/detail/distance_from_result.h deleted file mode 100644 index bf83e6ca44..0000000000 --- a/compat/thrust/iterator/detail/distance_from_result.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -namespace detail -{ - -// since both arguments are known to be specializations of iterator_facade, -// it's legal to access IteratorFacade2::difference_type -template - struct distance_from_result - : eval_if< - is_convertible::value, - identity_, - identity_ - > -{}; - -} // end detail - -} // end thrust - diff --git a/compat/thrust/iterator/detail/host_system_tag.h b/compat/thrust/iterator/detail/host_system_tag.h deleted file mode 100644 index 26d3f7d73f..0000000000 --- a/compat/thrust/iterator/detail/host_system_tag.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// #include the host system's execution_policy header -#define __THRUST_HOST_SYSTEM_TAG_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/execution_policy.h> -#include __THRUST_HOST_SYSTEM_TAG_HEADER -#undef __THRUST_HOST_SYSTEM_TAG_HEADER - -namespace thrust -{ - -typedef thrust::system::__THRUST_HOST_SYSTEM_NAMESPACE::tag host_system_tag; - -} // end thrust - -// TODO remove this in 1.8.0 -namespace thrust -{ - -typedef THRUST_DEPRECATED host_system_tag host_space_tag; - -} // end thrust - diff --git a/compat/thrust/iterator/detail/is_iterator_category.h b/compat/thrust/iterator/detail/is_iterator_category.h deleted file mode 100644 index 95f14d558c..0000000000 --- a/compat/thrust/iterator/detail/is_iterator_category.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - -template - struct is_host_iterator_category - : thrust::detail::or_< - thrust::detail::is_convertible, - thrust::detail::is_convertible - > -{ -}; // end is_host_iterator_category - -template - struct is_device_iterator_category - : thrust::detail::or_< - thrust::detail::is_convertible, - thrust::detail::is_convertible - > -{ -}; // end is_device_iterator_category - - -template - struct is_iterator_category - : thrust::detail::or_< - is_host_iterator_category, - is_device_iterator_category - > -{ -}; // end is_iterator_category - -} // end detail - -} // end thrust - diff --git a/compat/thrust/iterator/detail/is_trivial_iterator.h b/compat/thrust/iterator/detail/is_trivial_iterator.h deleted file mode 100644 index ca37e74e64..0000000000 --- a/compat/thrust/iterator/detail/is_trivial_iterator.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#if __GNUC__ -// forward declaration of gnu's __normal_iterator -namespace __gnu_cxx -{ - -template class __normal_iterator; - -} // end __gnu_cxx -#endif // __GNUC__ - -#if _MSC_VER -// forward declaration of MSVC's "normal iterators" -namespace std -{ - -template struct _Ranit; - -} // end std -#endif // _MSC_VER - -namespace thrust -{ -namespace detail -{ - -#ifdef __GNUC__ -template - struct is_gnu_normal_iterator - : false_type -{}; - - -// catch gnu __normal_iterators -template - struct is_gnu_normal_iterator< __gnu_cxx::__normal_iterator > - : true_type -{}; -#endif // __GNUC__ - - -#ifdef _MSC_VER -// catch msvc _Ranit -template - struct is_convertible_to_msvc_Ranit : - is_convertible< - Iterator, - std::_Ranit< - typename iterator_value::type, - typename iterator_difference::type, - typename iterator_pointer::type, - typename iterator_reference::type - > - > -{}; -#endif // _MSC_VER - - -template - struct is_trivial_iterator : - integral_constant< - bool, - is_pointer::value - | thrust::detail::is_thrust_pointer::value -#if __GNUC__ - | is_gnu_normal_iterator::value -#endif // __GNUC__ -#ifdef _MSC_VER - | is_convertible_to_msvc_Ranit::value -#endif // _MSC_VER - > -{}; - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/iterator_adaptor_base.h b/compat/thrust/iterator/detail/iterator_adaptor_base.h deleted file mode 100644 index 8b77f05d81..0000000000 --- a/compat/thrust/iterator/detail/iterator_adaptor_base.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - - -// forward declaration of iterator_adaptor for iterator_adaptor_base below -template -class iterator_adaptor; - - -namespace detail -{ - -// If T is use_default, return the result of invoking -// DefaultNullaryFn, otherwise return T. -// XXX rename to dflt_help -template -struct ia_dflt_help - : thrust::detail::eval_if< - thrust::detail::is_same::value - , DefaultNullaryFn - , thrust::detail::identity_ - > -{ -}; // end ia_dflt_help - - -// A metafunction which computes an iterator_adaptor's base class, -// a specialization of iterator_facade. -template - struct iterator_adaptor_base -{ - typedef typename ia_dflt_help< - Value, - iterator_value - >::type value; - - typedef typename ia_dflt_help< - System, - thrust::iterator_system - >::type system; - - typedef typename ia_dflt_help< - Traversal, - thrust::iterator_traversal - >::type traversal; - - typedef typename ia_dflt_help< - Reference, - thrust::detail::eval_if< - thrust::detail::is_same::value, - thrust::iterator_reference, - thrust::detail::add_reference - > - >::type reference; - - typedef typename ia_dflt_help< - Difference, - iterator_difference - >::type difference; - - typedef thrust::iterator_facade< - Derived, - value, - system, - traversal, - reference, - difference - > type; -}; // end iterator_adaptor_base - - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/iterator_category_to_system.h b/compat/thrust/iterator/detail/iterator_category_to_system.h deleted file mode 100644 index 17e7d78c8f..0000000000 --- a/compat/thrust/iterator/detail/iterator_category_to_system.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -// XXX WAR circular #inclusion with forward declarations -struct random_access_universal_iterator_tag; -struct input_universal_iterator_tag; -struct output_universal_iterator_tag; - -namespace detail -{ - -// forward declaration -template struct is_iterator_system; - -template struct device_iterator_category_to_backend_system; - -// XXX this should work entirely differently -// we should just specialize this metafunction for iterator_category_with_system_and_traversal -template - struct iterator_category_to_system - // convertible to any iterator? - : eval_if< - or_< - is_convertible, - is_convertible - >::value, - - detail::identity_, - - // convertible to host iterator? - eval_if< - or_< - is_convertible, - is_convertible - >::value, - - detail::identity_, - - // convertible to device iterator? - eval_if< - or_< - is_convertible, - is_convertible - >::value, - - detail::identity_, - - // unknown system - detail::identity_ - > // if device - > // if host - > // if any -{ -}; // end iterator_category_to_system - - -template - struct iterator_category_or_traversal_to_system - : eval_if< - is_iterator_system::value, - detail::identity_, - iterator_category_to_system - > -{ -}; // end iterator_category_or_traversal_to_system - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/iterator_category_to_traversal.h b/compat/thrust/iterator/detail/iterator_category_to_traversal.h deleted file mode 100644 index 04ef60c0c2..0000000000 --- a/compat/thrust/iterator/detail/iterator_category_to_traversal.h +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ - -// XXX WAR circular #inclusion with these forward declarations -struct bidirectional_universal_iterator_tag; -struct forward_universal_iterator_tag; - -namespace detail -{ - -// forward declarations -template struct is_iterator_system; -template struct is_iterator_traversal; - -// make type_traits easy to access -using namespace thrust::detail; - -template - struct host_system_category_to_traversal - : eval_if< - is_convertible::value, - detail::identity_, - eval_if< - is_convertible::value, - detail::identity_, - eval_if< - is_convertible::value, - detail::identity_, - eval_if< - is_convertible::value, - detail::identity_, - eval_if< - is_convertible::value, - detail::identity_, - void - > - > - > - > - > -{ -}; // end host_system_category_to_traversal - - - -template - struct device_system_category_to_traversal - : eval_if< - is_convertible::value, - detail::identity_, - eval_if< - is_convertible::value, - detail::identity_, - eval_if< - is_convertible::value, - detail::identity_, - eval_if< - is_convertible::value, - detail::identity_, - eval_if< - is_convertible::value, - detail::identity_, - void - > - > - > - > - > -{ -}; // end device_system_category_to_traversal - - - -template - struct any_system_category_to_traversal - : eval_if< - is_convertible::value, - identity_, - eval_if< - is_convertible::value, - identity_, - eval_if< - is_convertible::value, - identity_, - eval_if< - is_convertible::value, - identity_, - eval_if< - is_convertible::value, - identity_, - - // unknown traversal - void - > - > - > - > - > -{ -}; // end any_system_category_to_traversal - - -template - struct category_to_traversal - // check for any system - : eval_if< - or_< - is_convertible, - is_convertible - >::value, - - any_system_category_to_traversal, - - // check for host system - eval_if< - or_< - is_convertible, - is_convertible - >::value, - - host_system_category_to_traversal, - - // check for device system - eval_if< - or_< - is_convertible, - is_convertible - >::value, - - device_system_category_to_traversal, - - // unknown category - void - > - > - > -{}; - - -template - struct iterator_category_to_traversal - : eval_if< - is_iterator_traversal::value, - detail::identity_, - category_to_traversal - > -{ -}; // end iterator_category_to_traversal - - -} // end detail - -} // end thrust - diff --git a/compat/thrust/iterator/detail/iterator_facade_category.h b/compat/thrust/iterator/detail/iterator_facade_category.h deleted file mode 100644 index fbb8bd6451..0000000000 --- a/compat/thrust/iterator/detail/iterator_facade_category.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -namespace detail -{ - -template - struct iterator_category_with_system_and_traversal - : Category -{ -}; // end iterator_category_with_system_and_traversal - -// specialize iterator_category_to_system for iterator_category_with_system_and_traversal -template struct iterator_category_to_system; - -template - struct iterator_category_to_system > -{ - typedef System type; -}; // end iterator_category_with_system_and_traversal - - -// adapted from http://www.boost.org/doc/libs/1_37_0/libs/iterator/doc/iterator_facade.html#iterator-category -// -// in our implementation, R need not be a reference type to result in a category -// derived from forward_XXX_iterator_tag -// -// iterator-category(T,V,R) := -// if(T is convertible to input_host_iterator_tag -// || T is convertible to output_host_iterator_tag -// || T is convertible to input_device_iterator_tag -// || T is convertible to output_device_iterator_tag -// ) -// return T -// -// else if (T is not convertible to incrementable_traversal_tag) -// the program is ill-formed -// -// else return a type X satisfying the following two constraints: -// -// 1. X is convertible to X1, and not to any more-derived -// type, where X1 is defined by: -// -// if (T is convertible to forward_traversal_tag) -// { -// if (T is convertible to random_access_traversal_tag) -// X1 = random_access_host_iterator_tag -// else if (T is convertible to bidirectional_traversal_tag) -// X1 = bidirectional_host_iterator_tag -// else -// X1 = forward_host_iterator_tag -// } -// else -// { -// if (T is convertible to single_pass_traversal_tag -// && R is convertible to V) -// X1 = input_host_iterator_tag -// else -// X1 = T -// } -// -// 2. category-to-traversal(X) is convertible to the most -// derived traversal tag type to which X is also convertible, -// and not to any more-derived traversal tag type. - - -template - struct iterator_facade_default_category; - - -// Thrust's implementation of iterator_facade_default_category is slightly -// different from Boost's equivalent. -// Thrust does not check is_convertible because Reference -// may not be a complete type at this point, and implementations of is_convertible -// typically require that both types be complete. -// Instead, it simply assumes that if is_convertible, -// then the category is input_iterator_tag - - -// this is the function for standard system iterators -template - struct iterator_facade_default_category_std : - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::identity_ - > - >, - thrust::detail::eval_if< // XXX note we differ from Boost here - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::identity_ - > - > -{ -}; // end iterator_facade_default_category_std - - -// this is the function for host system iterators -template - struct iterator_facade_default_category_host : - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::identity_ - > - >, - thrust::detail::eval_if< // XXX note we differ from Boost here - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::identity_ - > - > -{ -}; // end iterator_facade_default_category_host - - -// this is the function for device system iterators -template - struct iterator_facade_default_category_device : - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::identity_ - > - >, - thrust::detail::eval_if< - thrust::detail::is_convertible::value, // XXX note we differ from Boost here - thrust::detail::identity_, - thrust::detail::identity_ - > - > -{ -}; // end iterator_facade_default_category_device - - -// this is the function for any system iterators -template - struct iterator_facade_default_category_any : - thrust::detail::eval_if< - - thrust::detail::is_convertible::value, - - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_, - - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - thrust::detail::identity_, - thrust::detail::identity_ - > - >, - - thrust::detail::eval_if< - thrust::detail::is_convertible::value, // XXX note we differ from Boost here - thrust::detail::identity_, - thrust::detail::identity_ - > - > -{ -}; // end iterator_facade_default_category_any - - -template - struct iterator_facade_default_category - // check for any system - : thrust::detail::eval_if< - thrust::detail::is_convertible::value, - iterator_facade_default_category_any, - - // check for host system - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - iterator_facade_default_category_host, - - // check for device system - thrust::detail::eval_if< - thrust::detail::is_convertible::value, - iterator_facade_default_category_device, - - // if we don't recognize the system, get a standard iterator category - // and combine it with System & Traversal - thrust::detail::identity_< - thrust::detail::iterator_category_with_system_and_traversal< - typename iterator_facade_default_category_std::type, - System, - Traversal - > - > - > - > - > -{}; - - -template - struct iterator_facade_category_impl -{ - typedef typename iterator_facade_default_category< - System,Traversal,ValueParam,Reference - >::type category; - - // we must be able to deduce both Traversal & System from category - // otherwise, munge them all together - typedef typename thrust::detail::eval_if< - thrust::detail::and_< - thrust::detail::is_same< - Traversal, - typename thrust::detail::iterator_category_to_traversal::type - >, - thrust::detail::is_same< - System, - typename thrust::detail::iterator_category_to_system::type - > - >::value, - thrust::detail::identity_, - thrust::detail::identity_ > - >::type type; -}; // end iterator_facade_category_impl - - -template - struct iterator_facade_category -{ - typedef typename - thrust::detail::eval_if< - thrust::detail::is_iterator_category::value, - thrust::detail::identity_, // categories are fine as-is - iterator_facade_category_impl - >::type type; -}; // end iterator_facade_category - - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/iterator_traits.inl b/compat/thrust/iterator/detail/iterator_traits.inl deleted file mode 100644 index 924eabb187..0000000000 --- a/compat/thrust/iterator/detail/iterator_traits.inl +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file iterator_traits.inl - * \brief Inline file for iterator_traits.h. - */ - -#include -#include -#include - -namespace thrust -{ - -template - struct iterator_value -{ - typedef typename thrust::iterator_traits::value_type type; -}; // end iterator_value - - -template - struct iterator_pointer -{ - typedef typename thrust::iterator_traits::pointer type; -}; // end iterator_pointer - - -template - struct iterator_reference -{ - typedef typename iterator_traits::reference type; -}; // end iterator_reference - - -template - struct iterator_difference -{ - typedef typename thrust::iterator_traits::difference_type type; -}; // end iterator_difference - - -template - struct iterator_system - : detail::iterator_category_to_system< - typename thrust::iterator_traits::iterator_category - > -{ -}; // end iterator_system - -// specialize iterator_system for void *, which has no category -template<> - struct iterator_system -{ - typedef thrust::iterator_system::type type; -}; // end iterator_system - -template<> - struct iterator_system -{ - typedef thrust::iterator_system::type type; -}; // end iterator_system - - -template - struct iterator_traversal - : detail::iterator_category_to_traversal< - typename thrust::iterator_traits::iterator_category - > -{ -}; // end iterator_traversal - -namespace detail -{ - -template - struct is_iterator_traversal - : thrust::detail::is_convertible -{ -}; // end is_iterator_traversal - - -template - struct is_iterator_system - : detail::or_< - detail::is_convertible, - detail::or_< - detail::is_convertible, - detail::is_convertible - > - > -{ -}; // end is_iterator_system - - -} // end namespace detail -} // end namespace thrust - diff --git a/compat/thrust/iterator/detail/iterator_traversal_tags.h b/compat/thrust/iterator/detail/iterator_traversal_tags.h deleted file mode 100644 index dcbebf3fda..0000000000 --- a/compat/thrust/iterator/detail/iterator_traversal_tags.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -namespace thrust -{ - -// define Boost's traversal tags -struct no_traversal_tag {}; - -struct incrementable_traversal_tag - : no_traversal_tag {}; - -struct single_pass_traversal_tag - : incrementable_traversal_tag {}; - -struct forward_traversal_tag - : single_pass_traversal_tag {}; - -struct bidirectional_traversal_tag - : forward_traversal_tag {}; - -struct random_access_traversal_tag - : bidirectional_traversal_tag {}; - -} // end thrust - diff --git a/compat/thrust/iterator/detail/minimum_category.h b/compat/thrust/iterator/detail/minimum_category.h deleted file mode 100644 index e07e09636e..0000000000 --- a/compat/thrust/iterator/detail/minimum_category.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ - -namespace detail -{ - -template - struct minimum_category - : minimum_type -{ -}; // end minimum_category - -} // end detail - -} // end thrust - - diff --git a/compat/thrust/iterator/detail/minimum_system.h b/compat/thrust/iterator/detail/minimum_system.h deleted file mode 100644 index 5448a0d1f0..0000000000 --- a/compat/thrust/iterator/detail/minimum_system.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace detail -{ - -template - struct minimum_system - : minimum_type -{ -}; // end minimum_system - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/normal_iterator.h b/compat/thrust/iterator/detail/normal_iterator.h deleted file mode 100644 index 7fe61bfed3..0000000000 --- a/compat/thrust/iterator/detail/normal_iterator.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file normal_iterator.h - * \brief Defines the interface to an iterator class - * which adapts a pointer type. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template - class normal_iterator - : public iterator_adaptor< - normal_iterator, - Pointer - > -{ - typedef iterator_adaptor, Pointer> super_t; - - public: - __host__ __device__ - normal_iterator() {} - - __host__ __device__ - normal_iterator(Pointer p) - : super_t(p) {} - - template - __host__ __device__ - normal_iterator(const normal_iterator &other, - typename thrust::detail::enable_if_convertible< - OtherPointer, - Pointer - >::type * = 0) - : super_t(other.base()) {} - -}; // end normal_iterator - - -template - inline __host__ __device__ normal_iterator make_normal_iterator(Pointer ptr) -{ - return normal_iterator(ptr); -} - - -template struct is_trivial_iterator< normal_iterator > : public true_type {}; - - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/permutation_iterator_base.h b/compat/thrust/iterator/detail/permutation_iterator_base.h deleted file mode 100644 index a145b88aec..0000000000 --- a/compat/thrust/iterator/detail/permutation_iterator_base.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -template class permutation_iterator; - - -namespace detail -{ - -template - struct permutation_iterator_base -{ - typedef typename thrust::iterator_system::type System1; - typedef typename thrust::iterator_system::type System2; - - typedef thrust::iterator_adaptor< - permutation_iterator, - IndexIterator, - typename thrust::iterator_value::type, - typename detail::minimum_system::type, - thrust::use_default, - typename thrust::iterator_reference::type - > type; -}; // end permutation_iterator_base - -} // end detail - -} // end thrust - diff --git a/compat/thrust/iterator/detail/retag.h b/compat/thrust/iterator/detail/retag.h deleted file mode 100644 index 4417fa5604..0000000000 --- a/compat/thrust/iterator/detail/retag.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -// we can retag an iterator if FromTag converts to ToTag -// or vice versa -template - struct is_retaggable - : integral_constant< - bool, - (is_convertible::value || is_convertible::value) - > -{}; - - -template - struct enable_if_retaggable - : enable_if< - is_retaggable::value, - Result - > -{}; // end enable_if_retaggable - - -} // end detail - - -template - thrust::detail::tagged_iterator - reinterpret_tag(Iterator iter) -{ - return thrust::detail::tagged_iterator(iter); -} // end reinterpret_tag() - - -// specialization for raw pointer -template - thrust::pointer - reinterpret_tag(T *ptr) -{ - return thrust::pointer(ptr); -} // end reinterpret_tag() - - -// specialization for thrust::pointer -template - thrust::pointer - reinterpret_tag(thrust::pointer ptr) -{ - return reinterpret_tag(ptr.get()); -} // end reinterpret_tag() - - -// avoid deeply-nested tagged_iterator -template - thrust::detail::tagged_iterator - reinterpret_tag(thrust::detail::tagged_iterator iter) -{ - return reinterpret_tag(iter.base()); -} // end reinterpret_tag() - - -template - typename thrust::detail::enable_if_retaggable< - typename thrust::iterator_system::type, - Tag, - thrust::detail::tagged_iterator - >::type - retag(Iterator iter) -{ - return reinterpret_tag(iter); -} // end retag() - - -// specialization for raw pointer -template - typename thrust::detail::enable_if_retaggable< - typename thrust::iterator_system::type, - Tag, - thrust::pointer - >::type - retag(T *ptr) -{ - return reinterpret_tag(ptr); -} // end retag() - - -// specialization for thrust::pointer -template - typename thrust::detail::enable_if_retaggable< - OtherTag, - Tag, - thrust::pointer - >::type - retag(thrust::pointer ptr) -{ - return reinterpret_tag(ptr); -} // end retag() - - -// avoid deeply-nested tagged_iterator -template - typename thrust::detail::enable_if_retaggable< - OtherTag, - Tag, - thrust::detail::tagged_iterator - >::type - retag(thrust::detail::tagged_iterator iter) -{ - return reinterpret_tag(iter); -} // end retag() - - -} // end thrust - diff --git a/compat/thrust/iterator/detail/reverse_iterator.inl b/compat/thrust/iterator/detail/reverse_iterator.inl deleted file mode 100644 index 03e9032130..0000000000 --- a/compat/thrust/iterator/detail/reverse_iterator.inl +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace thrust -{ - -namespace detail -{ - -__thrust_hd_warning_disable__ -template -__host__ __device__ - Iterator prior(Iterator x) -{ - return --x; -} // end prior() - -} // end detail - -template - reverse_iterator - ::reverse_iterator(BidirectionalIterator x) - :super_t(x) -{ -} // end reverse_iterator::reverse_iterator() - -template - template - reverse_iterator - ::reverse_iterator(reverse_iterator const &r -// XXX msvc screws this up -#ifndef _MSC_VER - , typename thrust::detail::enable_if< - thrust::detail::is_convertible< - OtherBidirectionalIterator, - BidirectionalIterator - >::value - >::type * -#endif // _MSC_VER - ) - :super_t(r.base()) -{ -} // end reverse_iterator::reverse_iterator() - -template - typename reverse_iterator::super_t::reference - reverse_iterator - ::dereference(void) const -{ - return *thrust::detail::prior(this->base()); -} // end reverse_iterator::increment() - -template - void reverse_iterator - ::increment(void) -{ - --this->base_reference(); -} // end reverse_iterator::increment() - -template - void reverse_iterator - ::decrement(void) -{ - ++this->base_reference(); -} // end reverse_iterator::decrement() - -template - void reverse_iterator - ::advance(typename super_t::difference_type n) -{ - this->base_reference() += -n; -} // end reverse_iterator::advance() - -template - template - typename reverse_iterator::super_t::difference_type - reverse_iterator - ::distance_to(reverse_iterator const &y) const -{ - return this->base_reference() - y.base(); -} // end reverse_iterator::distance_to() - -template -__host__ __device__ -reverse_iterator make_reverse_iterator(BidirectionalIterator x) -{ - return reverse_iterator(x); -} // end make_reverse_iterator() - - -} // end thrust - diff --git a/compat/thrust/iterator/detail/reverse_iterator_base.h b/compat/thrust/iterator/detail/reverse_iterator_base.h deleted file mode 100644 index c10c5b73ff..0000000000 --- a/compat/thrust/iterator/detail/reverse_iterator_base.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -template class reverse_iterator; - -namespace detail -{ - -template - struct reverse_iterator_base -{ - typedef thrust::iterator_adaptor< - thrust::reverse_iterator, - BidirectionalIterator - > type; -}; // end reverse_iterator_base - -} // end detail - -} // end thrust - diff --git a/compat/thrust/iterator/detail/tagged_iterator.h b/compat/thrust/iterator/detail/tagged_iterator.h deleted file mode 100644 index 69e6445183..0000000000 --- a/compat/thrust/iterator/detail/tagged_iterator.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -template class tagged_iterator; - -template - struct tagged_iterator_base -{ - typedef thrust::iterator_adaptor< - tagged_iterator, - Iterator, - typename thrust::iterator_value::type, - Tag, - typename thrust::iterator_traversal::type, - typename thrust::iterator_reference::type, - typename thrust::iterator_difference::type - > type; -}; // end tagged_iterator_base - -template - class tagged_iterator - : public tagged_iterator_base::type -{ - private: - typedef typename tagged_iterator_base::type super_t; - - public: - __host__ __device__ - tagged_iterator(void) {} - - __host__ __device__ - explicit tagged_iterator(Iterator x) - : super_t(x) {} -}; // end tagged_iterator - - -// specialize is_trivial_iterator for tagged_iterator -template struct is_trivial_iterator; - -// tagged_iterator is trivial if its base iterator is -template - struct is_trivial_iterator > - : is_trivial_iterator -{}; - - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/transform_iterator.inl b/compat/thrust/iterator/detail/transform_iterator.inl deleted file mode 100644 index a5a36a78be..0000000000 --- a/compat/thrust/iterator/detail/transform_iterator.inl +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ - -template - class transform_iterator; - -namespace detail -{ - -// Compute the iterator_adaptor instantiation to be used for transform_iterator -template -struct transform_iterator_base -{ - private: - // By default, dereferencing the iterator yields the same as the function. - typedef typename thrust::detail::ia_dflt_help< - Reference, - thrust::detail::result_of::type)> - >::type reference; - - // To get the default for Value: remove any reference on the - // result type, but retain any constness to signal - // non-writability. Note that if we adopt Thomas' suggestion - // to key non-writability *only* on the Reference argument, - // we'd need to strip constness here as well. - typedef typename thrust::detail::ia_dflt_help< - Value, - thrust::detail::remove_reference - >::type cv_value_type; - - public: - typedef thrust::iterator_adaptor - < - transform_iterator - , Iterator - , cv_value_type - , thrust::use_default // Leave the system alone - //, thrust::use_default // Leave the traversal alone - // use the Iterator's category to let any system iterators remain random access even though - // transform_iterator's reference type may not be a reference - // XXX figure out why only iterators whose reference types are true references are random access - , typename thrust::iterator_traits::iterator_category - , reference - > type; -}; - - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/tuple_of_iterator_references.h b/compat/thrust/iterator/detail/tuple_of_iterator_references.h deleted file mode 100644 index fdbf6b8f66..0000000000 --- a/compat/thrust/iterator/detail/tuple_of_iterator_references.h +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - - -template< - typename T0, typename T1, typename T2, - typename T3, typename T4, typename T5, - typename T6, typename T7, typename T8, - typename T9 -> - class tuple_of_iterator_references - : public thrust::tuple -{ - private: - typedef thrust::tuple super_t; - - public: - // allow implicit construction from tuple - inline __host__ __device__ - tuple_of_iterator_references(const super_t &other) - : super_t(other) - {} - - // allow assignment from tuples - // XXX might be worthwhile to guard this with an enable_if is_assignable - template - inline __host__ __device__ - tuple_of_iterator_references &operator=(const detail::cons &other) - { - super_t::operator=(other); - return *this; - } - - // allow assignment from pairs - // XXX might be worthwhile to guard this with an enable_if is_assignable - template - inline __host__ __device__ - tuple_of_iterator_references &operator=(const thrust::pair &other) - { - super_t::operator=(other); - return *this; - } - - // allow assignment from reference - // XXX perhaps we should generalize to reference - // we could captures reference this way - template - inline __host__ __device__ -// XXX gcc-4.2 crashes on is_assignable -// typename thrust::detail::enable_if< -// thrust::detail::is_assignable< -// super_t, -// const thrust::tuple -// >::value, -// tuple_of_iterator_references & -// >::type - tuple_of_iterator_references & - operator=(const thrust::reference, Pointer, Derived> &other) - { - typedef thrust::tuple tuple_type; - - // XXX perhaps this could be accelerated - tuple_type other_tuple = other; - super_t::operator=(other_tuple); - return *this; - } - - - // duplicate thrust::tuple's constructors - inline __host__ __device__ - tuple_of_iterator_references() {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0) - : super_t(t0, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1) - : super_t(t0, t1, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2) - : super_t(t0, t1, t2, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3) - : super_t(t0, t1, t2, t3, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4) - : super_t(t0, t1, t2, t3, t4, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5) - : super_t(t0, t1, t2, t3, t4, t5, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5, - typename access_traits::parameter_type t6) - : super_t(t0, t1, t2, t3, t4, t5, t6, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5, - typename access_traits::parameter_type t6, - typename access_traits::parameter_type t7) - : super_t(t0, t1, t2, t3, t4, t5, t6, t7, - static_cast(null_type()), - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5, - typename access_traits::parameter_type t6, - typename access_traits::parameter_type t7, - typename access_traits::parameter_type t8) - : super_t(t0, t1, t2, t3, t4, t5, t6, t7, t8, - static_cast(null_type())) - {} - - inline __host__ __device__ - tuple_of_iterator_references(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5, - typename access_traits::parameter_type t6, - typename access_traits::parameter_type t7, - typename access_traits::parameter_type t8, - typename access_traits::parameter_type t9) - : super_t(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) - {} -}; - - -} // end detail -} // end thrust - diff --git a/compat/thrust/iterator/detail/universal_categories.h b/compat/thrust/iterator/detail/universal_categories.h deleted file mode 100644 index 7c3922210c..0000000000 --- a/compat/thrust/iterator/detail/universal_categories.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -// define these types without inheritance to avoid ambiguous conversion to base classes - -struct input_universal_iterator_tag -{ - operator input_host_iterator_tag () {return input_host_iterator_tag();} - - operator input_device_iterator_tag () {return input_device_iterator_tag();} -}; - -struct output_universal_iterator_tag -{ - operator output_host_iterator_tag () {return output_host_iterator_tag();} - - operator output_device_iterator_tag () {return output_device_iterator_tag();} -}; - -struct forward_universal_iterator_tag - : input_universal_iterator_tag -{ - operator forward_host_iterator_tag () {return forward_host_iterator_tag();}; - - operator forward_device_iterator_tag () {return forward_device_iterator_tag();}; -}; - -struct bidirectional_universal_iterator_tag - : forward_universal_iterator_tag -{ - operator bidirectional_host_iterator_tag () {return bidirectional_host_iterator_tag();}; - - operator bidirectional_device_iterator_tag () {return bidirectional_device_iterator_tag();}; -}; - - -namespace detail -{ - -// create this struct to control conversion precedence in random_access_universal_iterator_tag -template -struct one_degree_of_separation - : T -{ -}; - -} // end detail - - -struct random_access_universal_iterator_tag -{ - // these conversions are all P0 - operator random_access_host_iterator_tag () {return random_access_host_iterator_tag();}; - - operator random_access_device_iterator_tag () {return random_access_device_iterator_tag();}; - - // bidirectional_universal_iterator_tag is P1 - operator detail::one_degree_of_separation () {return detail::one_degree_of_separation();} - -}; - - -} // end thrust - diff --git a/compat/thrust/iterator/detail/zip_iterator.inl b/compat/thrust/iterator/detail/zip_iterator.inl deleted file mode 100644 index fddd0ada11..0000000000 --- a/compat/thrust/iterator/detail/zip_iterator.inl +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -template - zip_iterator - ::zip_iterator(void) -{ -} // end zip_iterator::zip_iterator() - - -template - zip_iterator - ::zip_iterator(IteratorTuple iterator_tuple) - :m_iterator_tuple(iterator_tuple) -{ -} // end zip_iterator::zip_iterator() - - -template - template - zip_iterator - ::zip_iterator(const zip_iterator &other, - typename thrust::detail::enable_if_convertible< - OtherIteratorTuple, - IteratorTuple - >::type *) - :m_iterator_tuple(other.get_iterator_tuple()) -{ -} // end zip_iterator::zip_iterator() - - -template -const IteratorTuple &zip_iterator - ::get_iterator_tuple(void) const -{ - return m_iterator_tuple; -} // end zip_iterator::get_iterator_tuple() - - -template - typename zip_iterator::super_t::reference - zip_iterator - ::dereference(void) const -{ - using namespace detail::tuple_impl_specific; - - return thrust::detail::tuple_host_device_transform(get_iterator_tuple(), detail::dereference_iterator()); -} // end zip_iterator::dereference() - - -__thrust_hd_warning_disable__ -template - template - bool zip_iterator - ::equal(const zip_iterator &other) const -{ - return get<0>(get_iterator_tuple()) == get<0>(other.get_iterator_tuple()); -} // end zip_iterator::equal() - - -template - void zip_iterator - ::advance(typename super_t::difference_type n) -{ - using namespace detail::tuple_impl_specific; - - // XXX note that we use a pointer to System to dispatch to avoid - // default construction of a System - typename thrust::iterator_system::type *use_me_to_dispatch = 0; - - // dispatch on system - tuple_for_each(m_iterator_tuple, - detail::advance_iterator(n), - use_me_to_dispatch); -} // end zip_iterator::advance() - - -template - void zip_iterator - ::increment(void) -{ - using namespace detail::tuple_impl_specific; - - // XXX note that we use a pointer to System to dispatch to avoid - // default construction of a System - typename thrust::iterator_system::type *use_me_to_dispatch = 0; - - // dispatch on system - tuple_for_each(m_iterator_tuple, detail::increment_iterator(), - use_me_to_dispatch); -} // end zip_iterator::increment() - - -template - void zip_iterator - ::decrement(void) -{ - using namespace detail::tuple_impl_specific; - - // XXX note that we use a pointer to System to dispatch to avoid - // default construction of a System - typename thrust::iterator_system::type *use_me_to_dispatch = 0; - - // dispatch on system - tuple_for_each(m_iterator_tuple, detail::decrement_iterator(), - use_me_to_dispatch); -} // end zip_iterator::decrement() - - -__thrust_hd_warning_disable__ -template - template - typename zip_iterator::super_t::difference_type - zip_iterator - ::distance_to(const zip_iterator &other) const -{ - return get<0>(other.get_iterator_tuple()) - get<0>(get_iterator_tuple()); -} // end zip_iterator::distance_to() - - -template - zip_iterator make_zip_iterator(IteratorTuple t) -{ - return zip_iterator(t); -} // end make_zip_iterator() - - -} // end thrust - diff --git a/compat/thrust/iterator/detail/zip_iterator_base.h b/compat/thrust/iterator/detail/zip_iterator_base.h deleted file mode 100644 index 9dd7789e52..0000000000 --- a/compat/thrust/iterator/detail/zip_iterator_base.h +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -// forward declare zip_iterator for zip_iterator_base -template class zip_iterator; - -namespace detail -{ - - -// Functors to be used with tuple algorithms -// -template -class advance_iterator -{ -public: - inline __host__ __device__ - advance_iterator(DiffType step) : m_step(step) {} - - template - inline __host__ __device__ - void operator()(Iterator& it) const - { it += m_step; } - -private: - DiffType m_step; -}; // end advance_iterator - - -struct increment_iterator -{ - template - inline __host__ __device__ - void operator()(Iterator& it) - { ++it; } -}; // end increment_iterator - - -struct decrement_iterator -{ - template - inline __host__ __device__ - void operator()(Iterator& it) - { --it; } -}; // end decrement_iterator - - -struct dereference_iterator -{ - template - struct apply - { - typedef typename - iterator_traits::reference - type; - }; // end apply - - // XXX silence warnings of the form "calling a __host__ function from a __host__ __device__ function is not allowed - __thrust_hd_warning_disable__ - template - __host__ __device__ - typename apply::type operator()(Iterator const& it) - { - return *it; - } -}; // end dereference_iterator - - -// The namespace tuple_impl_specific provides two meta- -// algorithms and two algorithms for tuples. -namespace tuple_impl_specific -{ - -// define apply1 for tuple_meta_transform_impl -template - struct apply1 - : UnaryMetaFunctionClass::template apply -{ -}; // end apply1 - - -// define apply2 for tuple_meta_accumulate_impl -template - struct apply2 - : UnaryMetaFunctionClass::template apply -{ -}; // end apply2 - - -// Meta-accumulate algorithm for tuples. Note: The template -// parameter StartType corresponds to the initial value in -// ordinary accumulation. -// -template - struct tuple_meta_accumulate; - -template< - typename Tuple - , class BinaryMetaFun - , typename StartType -> - struct tuple_meta_accumulate_impl -{ - typedef typename apply2< - BinaryMetaFun - , typename Tuple::head_type - , typename tuple_meta_accumulate< - typename Tuple::tail_type - , BinaryMetaFun - , StartType - >::type - >::type type; -}; - - -template< - typename Tuple - , class BinaryMetaFun - , typename StartType -> -struct tuple_meta_accumulate - : thrust::detail::eval_if< - thrust::detail::is_same::value - , thrust::detail::identity_ - , tuple_meta_accumulate_impl< - Tuple - , BinaryMetaFun - , StartType - > - > // end eval_if -{ -}; // end tuple_meta_accumulate - - -// transform algorithm for tuples. The template parameter Fun -// must be a unary functor which is also a unary metafunction -// class that computes its return type based on its argument -// type. For example: -// -// struct to_ptr -// { -// template -// struct apply -// { -// typedef Arg* type; -// } -// -// template -// Arg* operator()(Arg x); -// }; - - - -// for_each algorithm for tuples. -// -template -inline __host__ __device__ -Fun tuple_for_each(thrust::null_type, Fun f, System *) -{ - return f; -} // end tuple_for_each() - - -template -inline __host__ __device__ -Fun tuple_for_each(Tuple& t, Fun f, System *dispatch_tag) -{ - f( t.get_head() ); - return tuple_for_each(t.get_tail(), f, dispatch_tag); -} // end tuple_for_each() - - -template -inline __host__ __device__ -Fun tuple_for_each(Tuple& t, Fun f, thrust::host_system_tag *dispatch_tag) -{ -// XXX this path is required in order to accomodate pure host iterators -// (such as std::vector::iterator) in a zip_iterator -#ifndef __CUDA_ARCH__ - f( t.get_head() ); - return tuple_for_each(t.get_tail(), f, dispatch_tag); -#else - // this code will never be called - return f; -#endif -} // end tuple_for_each() - - -// Equality of tuples. NOTE: "==" for tuples currently (7/2003) -// has problems under some compilers, so I just do my own. -// No point in bringing in a bunch of #ifdefs here. This is -// going to go away with the next tuple implementation anyway. -// -__host__ __device__ -inline bool tuple_equal(thrust::null_type, thrust::null_type) -{ return true; } - - -template -__host__ __device__ -bool tuple_equal(Tuple1 const& t1, Tuple2 const& t2) -{ - return t1.get_head() == t2.get_head() && - tuple_equal(t1.get_tail(), t2.get_tail()); -} // end tuple_equal() - -} // end end tuple_impl_specific - - -// Metafunction to obtain the type of the tuple whose element types -// are the value_types of an iterator tupel. -// -template - struct tuple_of_value_types - : tuple_meta_transform< - IteratorTuple, - iterator_value - > -{ -}; // end tuple_of_value_types - - -struct minimum_category_lambda -{ - template - struct apply : minimum_category - {}; -}; - - - -// Metafunction to obtain the minimal traversal tag in a tuple -// of iterators. -// -template -struct minimum_traversal_category_in_iterator_tuple -{ - typedef typename tuple_meta_transform< - IteratorTuple - , thrust::iterator_traversal - >::type tuple_of_traversal_tags; - - typedef typename tuple_impl_specific::tuple_meta_accumulate< - tuple_of_traversal_tags - , minimum_category_lambda - , thrust::random_access_traversal_tag - >::type type; -}; - - -struct minimum_system_lambda -{ - template - struct apply : minimum_system - {}; -}; - - - -// Metafunction to obtain the minimal system tag in a tuple -// of iterators. -template -struct minimum_system_in_iterator_tuple -{ - typedef typename thrust::detail::tuple_meta_transform< - IteratorTuple, - thrust::iterator_system - >::type tuple_of_system_tags; - - typedef typename tuple_impl_specific::tuple_meta_accumulate< - tuple_of_system_tags, - minimum_system_lambda, - thrust::any_system_tag - >::type type; -}; - -namespace zip_iterator_base_ns -{ - - -template - struct tuple_elements_helper - : eval_if< - (i < tuple_size::value), - tuple_element, - identity_ - > -{}; - - -template - struct tuple_elements -{ - typedef typename tuple_elements_helper<0,Tuple>::type T0; - typedef typename tuple_elements_helper<1,Tuple>::type T1; - typedef typename tuple_elements_helper<2,Tuple>::type T2; - typedef typename tuple_elements_helper<3,Tuple>::type T3; - typedef typename tuple_elements_helper<4,Tuple>::type T4; - typedef typename tuple_elements_helper<5,Tuple>::type T5; - typedef typename tuple_elements_helper<6,Tuple>::type T6; - typedef typename tuple_elements_helper<7,Tuple>::type T7; - typedef typename tuple_elements_helper<8,Tuple>::type T8; - typedef typename tuple_elements_helper<9,Tuple>::type T9; -}; - - -template - struct tuple_of_iterator_references -{ - // get a thrust::tuple of the iterators' references - typedef typename tuple_meta_transform< - IteratorTuple, - iterator_reference - >::type tuple_of_references; - - // get at the individual tuple element types by name - typedef tuple_elements elements; - - // map thrust::tuple to tuple_of_iterator_references - typedef thrust::detail::tuple_of_iterator_references< - typename elements::T0, - typename elements::T1, - typename elements::T2, - typename elements::T3, - typename elements::T4, - typename elements::T5, - typename elements::T6, - typename elements::T7, - typename elements::T8, - typename elements::T9 - > type; -}; - - -} // end zip_iterator_base_ns - -/////////////////////////////////////////////////////////////////// -// -// Class zip_iterator_base -// -// Builds and exposes the iterator facade type from which the zip -// iterator will be derived. -// -template - struct zip_iterator_base -{ - //private: - // reference type is the type of the tuple obtained from the - // iterators' reference types. - typedef typename zip_iterator_base_ns::tuple_of_iterator_references::type reference; - - // Boost's Value type is the same as reference type. - //typedef reference value_type; - typedef typename tuple_of_value_types::type value_type; - - // Difference type is the first iterator's difference type - typedef typename thrust::iterator_traits< - typename thrust::tuple_element<0, IteratorTuple>::type - >::difference_type difference_type; - - // Iterator system is the minimum system tag in the - // iterator tuple - typedef typename - minimum_system_in_iterator_tuple::type system; - - // Traversal category is the minimum traversal category in the - // iterator tuple - typedef typename - minimum_traversal_category_in_iterator_tuple::type traversal_category; - - public: - - // The iterator facade type from which the zip iterator will - // be derived. - typedef thrust::iterator_facade< - zip_iterator, - value_type, - system, - traversal_category, - reference, - difference_type - > type; -}; // end zip_iterator_base - -} // end detail - -} // end thrust - - diff --git a/compat/thrust/iterator/discard_iterator.h b/compat/thrust/iterator/discard_iterator.h deleted file mode 100644 index 6e089b567e..0000000000 --- a/compat/thrust/iterator/discard_iterator.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/discard_iterator.h - * \brief An iterator which "discards" (ignores) values assigned to it upon dereference - */ - -#pragma once - -#include -#include -#include - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - -namespace thrust -{ - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - -/*! \p discard_iterator is an iterator which represents a special kind of pointer that - * ignores values written to it upon dereference. This iterator is useful for ignoring - * the output of certain algorithms without wasting memory capacity or bandwidth. - * \p discard_iterator may also be used to count the size of an algorithm's output which - * may not be known a priori. - * - * The following code snippet demonstrates how to use \p discard_iterator to ignore - * ignore one of the output ranges of reduce_by_key - * - * \code - * #include - * #include - * #include - * - * int main(void) - * { - * thrust::device_vector keys(7), values(7); - * - * keys[0] = 1; - * keys[1] = 3; - * keys[2] = 3; - * keys[3] = 3; - * keys[4] = 2; - * keys[5] = 2; - * keys[6] = 1; - * - * values[0] = 9; - * values[1] = 8; - * values[2] = 7; - * values[3] = 6; - * values[4] = 5; - * values[5] = 4; - * values[6] = 3; - * - * thrust::device_vector result(4); - * - * // we are only interested in the reduced values - * // use discard_iterator to ignore the output keys - * thrust::reduce_by_key(keys.begin(), keys.end(), - * values.begin(), values.end(), - * thrust::make_discard_iterator(), - * result.begin()); - * - * // result is now [9, 21, 9, 3] - * - * return 0; - * } - * \endcode - * - * \see make_discard_iterator - */ -template - class discard_iterator - : public detail::discard_iterator_base::type -{ - /*! \cond - */ - friend class thrust::iterator_core_access; - typedef typename detail::discard_iterator_base::type super_t; - typedef typename detail::discard_iterator_base::incrementable incrementable; - typedef typename detail::discard_iterator_base::base_iterator base_iterator; - - public: - typedef typename super_t::reference reference; - typedef typename super_t::value_type value_type; - - /*! \endcond - */ - - /*! Copy constructor copies from a source discard_iterator. - * - * \p rhs The discard_iterator to copy. - */ - __host__ __device__ - discard_iterator(discard_iterator const &rhs) - : super_t(rhs.base()) {} - - /*! This constructor receives an optional index specifying the position of this - * \p discard_iterator in a range. - * - * \p i The index of this \p discard_iterator in a range. Defaults to the - * value returned by \c Incrementable's null constructor. For example, - * when Incrementable == int, \c 0. - */ - __host__ __device__ - discard_iterator(incrementable const &i = incrementable()) - : super_t(base_iterator(i)) {} - - /*! \cond - */ - - private: // Core iterator interface - __host__ __device__ - reference dereference(void) const - { - return m_element; - } - - mutable value_type m_element; - - /*! \endcond - */ -}; // end constant_iterator - - -/*! \p make_discard_iterator creates a \p discard_iterator from an optional index parameter. - * - * \param i The index of the returned \p discard_iterator within a range. - * In the default case, the value of this parameter is \c 0. - * - * \return A new \p discard_iterator with index as given by \p i. - * - * \see constant_iterator - */ -inline __host__ __device__ -discard_iterator<> make_discard_iterator(discard_iterator<>::difference_type i = discard_iterator<>::difference_type(0)) -{ - return discard_iterator<>(i); -} // end make_discard_iterator() - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end namespace thrust - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - diff --git a/compat/thrust/iterator/iterator_adaptor.h b/compat/thrust/iterator/iterator_adaptor.h deleted file mode 100644 index 7b9cca308a..0000000000 --- a/compat/thrust/iterator/iterator_adaptor.h +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/iterator_adaptor.h - * \brief An iterator which adapts a base iterator - */ - -/* - * (C) Copyright David Abrahams 2002. - * (C) Copyright Jeremy Siek 2002. - * (C) Copyright Thomas Witt 2002. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - -/*! \p iterator_adaptor is an iterator which adapts an existing type of iterator to create a new type of - * iterator. Most of Thrust's fancy iterators are defined via inheritance from \p iterator_adaptor. - * While composition of these existing Thrust iterators is often sufficient for expressing the desired - * functionality, it is occasionally more straightforward to derive from \p iterator_adaptor directly. - * - * To see how to use \p iterator_adaptor to create a novel iterator type, let's examine how to use it to - * define \p repeat_iterator, a fancy iterator which repeats elements from another range a given number of time: - * - * \code - * #include - * - * // derive repeat_iterator from iterator_adaptor - * template - * class repeat_iterator - * : public thrust::iterator_adaptor< - * repeat_iterator, // the first template parameter is the name of the iterator we're creating - * Iterator // the second template parameter is the name of the iterator we're adapting - * // we can use the default for the additional template parameters - * > - * { - * public: - * // shorthand for the name of the iterator_adaptor we're deriving from - * typedef thrust::iterator_adaptor< - * repeat_iterator, - * Iterator - * > super_t; - * - * __host__ __device__ - * repeat_iterator(const Iterator &x, int n) : super_t(x), begin(x), n(n) {} - * - * // befriend thrust::iterator_core_access to allow it access to the private interface below - * friend class thrust::iterator_core_access; - * - * private: - * // repeat each element of the adapted range n times - * unsigned int n; - * - * // used to keep track of where we began - * const Iterator begin; - * - * // it is private because only thrust::iterator_core_access needs access to it - * __host__ __device__ - * typename super_t::reference dereference() const - * { - * return *(begin + (this->base() - begin) / n); - * } - * }; - * \endcode - * - * Except for the first two, \p iterator_adaptor's template parameters are optional. When omitted, or when the - * user specifies \p thrust::use_default in its place, \p iterator_adaptor will use a default type inferred from \p Base. - * - * \p iterator_adaptor's functionality is derived from and generally equivalent to \p boost::iterator_adaptor. - * The exception is Thrust's addition of the template parameter \p System, which is necessary to allow Thrust - * to dispatch an algorithm to one of several parallel backend systems. - * - * \p iterator_adaptor is a powerful tool for creating custom iterators directly. However, the large set of iterator semantics which must be satisfied - * for algorithm compatibility can make \p iterator_adaptor difficult to use correctly. Unless you require the full expressivity of \p iterator_adaptor, - * consider building a custom iterator through composition of existing higher-level fancy iterators instead. - * - * Interested users may refer to boost::iterator_adaptor's documentation for further usage examples. - */ -template - class iterator_adaptor: - public detail::iterator_adaptor_base< - Derived, Base, Value, System, Traversal, Reference, Difference - >::type -{ - /*! \cond - */ - - friend class thrust::iterator_core_access; - - protected: - typedef typename detail::iterator_adaptor_base< - Derived, Base, Value, System, Traversal, Reference, Difference - >::type super_t; - - /*! \endcond - */ - - public: - /*! \p iterator_adaptor's default constructor does nothing. - */ - __host__ __device__ - iterator_adaptor(){} - - /*! This constructor copies from a given instance of the \p Base iterator. - */ - __host__ __device__ - explicit iterator_adaptor(Base const& iter) - : m_iterator(iter) - {} - - /*! The type of iterator this \p iterator_adaptor's \p adapts. - */ - typedef Base base_type; - - /*! \cond - */ - typedef typename super_t::reference reference; - - typedef typename super_t::difference_type difference_type; - /*! \endcond - */ - - /*! \return A \p const reference to the \p Base iterator this \p iterator_adaptor adapts. - */ - __host__ __device__ - Base const& base() const - { return m_iterator; } - - protected: - /*! \return A \p const reference to the \p Base iterator this \p iterator_adaptor adapts. - */ - __host__ __device__ - Base const& base_reference() const - { return m_iterator; } - - /*! \return A mutable reference to the \p Base iterator this \p iterator_adaptor adapts. - */ - __host__ __device__ - Base& base_reference() - { return m_iterator; } - - /*! \cond - */ - private: // Core iterator interface for iterator_facade - - __thrust_hd_warning_disable__ - __host__ __device__ - typename iterator_adaptor::reference dereference() const - { return *m_iterator; } - - __thrust_hd_warning_disable__ - template - __host__ __device__ - bool equal(iterator_adaptor const& x) const - { return m_iterator == x.base(); } - - __thrust_hd_warning_disable__ - __host__ __device__ - void advance(typename iterator_adaptor::difference_type n) - { - // XXX statically assert on random_access_traversal_tag - m_iterator += n; - } - - __thrust_hd_warning_disable__ - __host__ __device__ - void increment() - { ++m_iterator; } - - __thrust_hd_warning_disable__ - __host__ __device__ - void decrement() - { - // XXX statically assert on bidirectional_traversal_tag - --m_iterator; - } - - __thrust_hd_warning_disable__ - template - __host__ __device__ - typename iterator_adaptor::difference_type distance_to(iterator_adaptor const& y) const - { return y.base() - m_iterator; } - - private: - Base m_iterator; - - /*! \endcond - */ -}; // end iterator_adaptor - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end thrust - diff --git a/compat/thrust/iterator/iterator_categories.h b/compat/thrust/iterator/iterator_categories.h deleted file mode 100644 index 81601b4a40..0000000000 --- a/compat/thrust/iterator/iterator_categories.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/iterator_categories.h - * \brief Types for reasoning about the categories of iterators - */ - -/* - * (C) Copyright Jeremy Siek 2002. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - - -#pragma once - -#include - -// #include this for stl's iterator tags -#include - -namespace thrust -{ - -/*! \addtogroup iterators - * \addtogroup iterator_tags Iterator Tags - * \ingroup iterators - * \addtogroup iterator_tag_classes Iterator Tag Classes - * \ingroup iterator_tags - * \{ - */ - -/*! \p input_device_iterator_tag is an empty class: it has no member functions, - * member variables, or nested types. It is used solely as a "tag": a - * representation of the Input Device Iterator concept within the C++ type - * system. - * - * \see http://www.sgi.com/tech/sgi/input_iterator_tag.html, iterator_traits, - * output_device_iterator_tag, forward_device_iterator_tag, - * bidirectional_device_iterator_tag, random_access_device_iterator_tag, - * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, - * bidirectional_host_iterator_tag, random_access_host_iterator_tag - */ -struct input_device_iterator_tag {}; - -/*! \p output_device_iterator_tag is an empty class: it has no member functions, - * member variables, or nested types. It is used solely as a "tag": a - * representation of the Output Device Iterator concept within the C++ type - * system. - * - * \see http://www.sgi.com/tech/sgi/output_iterator_tag.html, iterator_traits, - * input_device_iterator_tag, forward_device_iterator_tag, - * bidirectional_device_iterator_tag, random_access_device_iterator_tag, - * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, - * bidirectional_host_iterator_tag, random_access_host_iterator_tag - */ -struct output_device_iterator_tag {}; - -/*! \p forward_device_iterator_tag is an empty class: it has no member functions, - * member variables, or nested types. It is used solely as a "tag": a - * representation of the Forward Device Iterator concept within the C++ type - * system. - * - * \see http://www.sgi.com/tech/sgi/forward_iterator_tag.html, iterator_traits, - * input_device_iterator_tag, output_device_iterator_tag, - * bidirectional_device_iterator_tag, random_access_device_iterator_tag, - * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, - * bidirectional_host_iterator_tag, random_access_host_iterator_tag - */ -struct forward_device_iterator_tag : public input_device_iterator_tag {}; - -/*! \p bidirectional_device_iterator_tag is an empty class: it has no member - * functions, member variables, or nested types. It is used solely as a "tag": a - * representation of the Bidirectional Device Iterator concept within the C++ - * type system. - * - * \see http://www.sgi.com/tech/sgi/bidirectional_iterator_tag.html, - * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, - * forward_device_iterator_tag, random_access_device_iterator_tag, - * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, - * bidirectional_host_iterator_tag, random_access_host_iterator_tag - */ -struct bidirectional_device_iterator_tag : public forward_device_iterator_tag {}; - -/*! \p random_access_device_iterator_tag is an empty class: it has no member - * functions, member variables, or nested types. It is used solely as a "tag": a - * representation of the Random Access Device Iterator concept within the C++ - * type system. - * - * \see http://www.sgi.com/tech/sgi/random_access_iterator_tag.html, - * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, - * forward_device_iterator_tag, bidirectional_device_iterator_tag, - * input_host_iterator_tag, output_host_iterator_tag, forward_host_iterator_tag, - * bidirectional_host_iterator_tag, random_access_host_iterator_tag - */ -struct random_access_device_iterator_tag : public bidirectional_device_iterator_tag {}; - -/*! \p input_host_iterator_tag is an empty class: it has no member - * functions, member variables, or nested types. It is used solely as a "tag": a - * representation of the Input Host Iterator concept within the C++ - * type system. - * - * \see http://www.sgi.com/tech/sgi/input_iterator_tag.html, - * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, - * forward_device_iterator_tag, bidirectional_device_iterator_tag, - * random_access_device_iterator_tag, - * output_host_iterator_tag, forward_host_iterator_tag, - * bidirectional_host_iterator_tag, random_access_host_iterator_tag - */ -typedef std::input_iterator_tag input_host_iterator_tag; - -/*! \p output_host_iterator_tag is an empty class: it has no member - * functions, member variables, or nested types. It is used solely as a "tag": a - * representation of the Output Host Iterator concept within the C++ - * type system. - * - * \see http://www.sgi.com/tech/sgi/output_iterator_tag.html, - * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, - * forward_device_iterator_tag, bidirectional_device_iterator_tag, - * random_access_device_iterator_tag, - * input_host_iterator_tag, forward_host_iterator_tag, - * bidirectional_host_iterator_tag, random_access_host_iterator_tag - */ -typedef std::output_iterator_tag output_host_iterator_tag; - -/*! \p forward_host_iterator_tag is an empty class: it has no member - * functions, member variables, or nested types. It is used solely as a "tag": a - * representation of the Forward Host Iterator concept within the C++ - * type system. - * - * \see http://www.sgi.com/tech/sgi/forward_iterator_tag.html, - * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, - * forward_device_iterator_tag, bidirectional_device_iterator_tag, - * random_access_device_iterator_tag, - * input_host_iterator_tag, output_host_iterator_tag, - * bidirectional_host_iterator_tag, random_access_host_iterator_tag - */ -typedef std::forward_iterator_tag forward_host_iterator_tag; - -/*! \p bidirectional_host_iterator_tag is an empty class: it has no member - * functions, member variables, or nested types. It is used solely as a "tag": a - * representation of the Forward Host Iterator concept within the C++ - * type system. - * - * \see http://www.sgi.com/tech/sgi/bidirectional_iterator_tag.html, - * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, - * forward_device_iterator_tag, bidirectional_device_iterator_tag, - * random_access_device_iterator_tag, - * input_host_iterator_tag, output_host_iterator_tag, - * forward_host_iterator_tag, random_access_host_iterator_tag - */ -typedef std::bidirectional_iterator_tag bidirectional_host_iterator_tag; - -/*! \p random_access_host_iterator_tag is an empty class: it has no member - * functions, member variables, or nested types. It is used solely as a "tag": a - * representation of the Forward Host Iterator concept within the C++ - * type system. - * - * \see http://www.sgi.com/tech/sgi/random_access_iterator_tag.html, - * iterator_traits, input_device_iterator_tag, output_device_iterator_tag, - * forward_device_iterator_tag, bidirectional_device_iterator_tag, - * random_access_device_iterator_tag, - * input_host_iterator_tag, output_host_iterator_tag, - * forward_host_iterator_tag, bidirectional_host_iterator_tag - */ -typedef std::random_access_iterator_tag random_access_host_iterator_tag; - -/*! \} // end iterator_tag_classes - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/iterator/iterator_facade.h b/compat/thrust/iterator/iterator_facade.h deleted file mode 100644 index 232c150a6f..0000000000 --- a/compat/thrust/iterator/iterator_facade.h +++ /dev/null @@ -1,538 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/iterator/iterator_facade.h - * \brief A class which exposes a public interface for iterators - */ - -/* - * (C) Copyright David Abrahams 2002. - * (C) Copyright Jeremy Siek 2002. - * (C) Copyright Thomas Witt 2002. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - - -// This forward declaration is required for the friend declaration -// in iterator_core_access -template class iterator_facade; - - -/*! \p iterator_core_access is the class which user iterator types derived from \p thrust::iterator_adaptor - * or \p thrust::iterator_facade must befriend to allow it to access their private interface. - */ -class iterator_core_access -{ - /*! \cond - */ - - // declare our friends - template friend class iterator_facade; - - // iterator comparisons are our friends - template - inline __host__ __device__ - friend bool - operator ==(iterator_facade const& lhs, - iterator_facade const& rhs); - - template - inline __host__ __device__ - friend bool - operator !=(iterator_facade const& lhs, - iterator_facade const& rhs); - - template - inline __host__ __device__ - friend bool - operator <(iterator_facade const& lhs, - iterator_facade const& rhs); - - template - inline __host__ __device__ - friend bool - operator >(iterator_facade const& lhs, - iterator_facade const& rhs); - - template - inline __host__ __device__ - friend bool - operator <=(iterator_facade const& lhs, - iterator_facade const& rhs); - - template - inline __host__ __device__ - friend bool - operator >=(iterator_facade const& lhs, - iterator_facade const& rhs); - - // iterator difference is our friend - template - inline __host__ __device__ - friend - typename thrust::detail::distance_from_result< - iterator_facade, - iterator_facade - >::type - operator-(iterator_facade const& lhs, - iterator_facade const& rhs); - - template - __host__ __device__ - static typename Facade::reference dereference(Facade const& f) - { - return f.dereference(); - } - - template - __host__ __device__ - static void increment(Facade& f) - { - f.increment(); - } - - template - __host__ __device__ - static void decrement(Facade& f) - { - f.decrement(); - } - - template - __host__ __device__ - static bool equal(Facade1 const& f1, Facade2 const& f2) - { - return f1.equal(f2); - } - - // XXX TODO: Investigate whether we need both of these cases - //template - //__host__ __device__ - //static bool equal(Facade1 const& f1, Facade2 const& f2, mpl::true_) - //{ - // return f1.equal(f2); - //} - - //template - //__host__ __device__ - //static bool equal(Facade1 const& f1, Facade2 const& f2, mpl::false_) - //{ - // return f2.equal(f1); - //} - - template - __host__ __device__ - static void advance(Facade& f, typename Facade::difference_type n) - { - f.advance(n); - } - - // Facade2 is convertible to Facade1, - // so return Facade1's difference_type - template - __host__ __device__ - static typename Facade1::difference_type - distance_from(Facade1 const& f1, Facade2 const& f2, thrust::detail::true_type) - { - return -f1.distance_to(f2); - } - - // Facade2 is not convertible to Facade1, - // so return Facade2's difference_type - template - __host__ __device__ - static typename Facade2::difference_type - distance_from(Facade1 const& f1, Facade2 const& f2, thrust::detail::false_type) - { - return f2.distance_to(f1); - } - - template - __host__ __device__ - static typename thrust::detail::distance_from_result::type - distance_from(Facade1 const& f1, Facade2 const& f2) - { - // dispatch the implementation of this method upon whether or not - // Facade2 is convertible to Facade1 - return distance_from(f1, f2, - typename thrust::detail::is_convertible::type()); - } - - // - // Curiously Recurring Template interface. - // - template - __host__ __device__ - static Derived& derived(iterator_facade& facade) - { - return *static_cast(&facade); - } - - template - __host__ __device__ - static Derived const& derived(iterator_facade const& facade) - { - return *static_cast(&facade); - } - - /*! \endcond - */ -}; // end iterator_core_access - - -/*! \p iterator_facade is a template which allows the programmer to define a novel iterator with a standards-conforming interface - * which Thrust can use to reason about algorithm acceleration opportunities. - * - * Because most of a standard iterator's interface is defined in terms of a small set of core primitives, \p iterator_facade - * defines the non-primitive portion mechanically. In principle a novel iterator could explicitly provide the entire interface in - * an ad hoc fashion but doing so might be tedious and prone to subtle errors. - * - * Often \p iterator_facade is too primitive a tool to use for defining novel iterators. In these cases, \p iterator_adaptor - * or a specific fancy iterator should be used instead. - * - * \p iterator_facade's functionality is derived from and generally equivalent to \p boost::iterator_facade. - * The exception is Thrust's addition of the template parameter \p System, which is necessary to allow Thrust - * to dispatch an algorithm to one of several parallel backend systems. An additional exception is Thrust's omission - * of the \c operator-> member function. - * - * Interested users may refer to boost::iterator_facade's documentation for usage examples. - * - * \note \p iterator_facade's arithmetic operator free functions exist with the usual meanings but are omitted here for brevity. - */ -template - class iterator_facade -{ - private: - /*! \cond - */ - - // - // Curiously Recurring Template interface. - // - __host__ __device__ - Derived& derived() - { - return *static_cast(this); - } - - __host__ __device__ - Derived const& derived() const - { - return *static_cast(this); - } - /*! \endcond - */ - - public: - /*! The type of element pointed to by \p iterator_facade. - */ - typedef typename thrust::detail::remove_const::type value_type; - - /*! The return type of \p iterator_facade::operator*(). - */ - typedef Reference reference; - - /*! The return type of \p iterator_facade's non-existent \c operator->() - * member function. Unlike \c boost::iterator_facade, \p iterator_facade - * disallows access to the \p value_type's members through expressions of the - * form iter->member. \p pointer is defined to \c void to indicate - * that these expressions are not allowed. This limitation may be relaxed in a - * future version of Thrust. - */ - typedef void pointer; - - /*! The type of expressions of the form x - y where x and y - * are of type \p iterator_facade. - */ - typedef Difference difference_type; - - /*! The type of iterator category of \p iterator_facade. - */ - typedef typename thrust::detail::iterator_facade_category< - System, Traversal, Value, Reference - >::type iterator_category; - - /*! \p operator*() dereferences this \p iterator_facade. - * \return A reference to the element pointed to by this \p iterator_facade. - */ - __host__ __device__ - reference operator*() const - { - return iterator_core_access::dereference(this->derived()); - } - - // XXX unimplemented for now, consider implementing it later - //pointer operator->() const - //{ - // return; - //} - - // XXX investigate whether or not we need to go to the lengths - // boost does to determine the return type - - /*! \p operator[] performs indexed dereference. - * \return A reference to the element \p n distance away from this \p iterator_facade. - */ - __host__ __device__ - reference operator[](difference_type n) const - { - return *(this->derived() + n); - } - - /*! \p operator++ pre-increments this \p iterator_facade to refer to the element in the next position. - * \return *this - */ - __host__ __device__ - Derived& operator++() - { - iterator_core_access::increment(this->derived()); - return this->derived(); - } - - /*! \p operator++ post-increments this \p iterator_facade and returns a new \p iterator_facade referring to the element in the next position. - * \return A copy of *this before increment. - */ - __host__ __device__ - Derived operator++(int) - { - Derived tmp(this->derived()); - ++*this; - return tmp; - } - - /*! \p operator-- pre-decrements this \p iterator_facade to refer to the element in the previous position. - * \return *this - */ - __host__ __device__ - Derived& operator--() - { - iterator_core_access::decrement(this->derived()); - return this->derived(); - } - - /*! \p operator-- post-decrements this \p iterator_facade and returns a new \p iterator_facade referring to the element in the previous position. - * \return A copy of *this before decrement. - */ - __host__ __device__ - Derived operator--(int) - { - Derived tmp(this->derived()); - --*this; - return tmp; - } - - /*! \p operator+= increments this \p iterator_facade to refer to an element a given distance after its current position. - * \param n The quantity to increment. - * \return *this - */ - __host__ __device__ - Derived& operator+=(difference_type n) - { - iterator_core_access::advance(this->derived(), n); - return this->derived(); - } - - /*! \p operator-= decrements this \p iterator_facade to refer to an element a given distance before its current postition. - * \param n The quantity to decrement. - * \return *this - */ - __host__ __device__ - Derived& operator-=(difference_type n) - { - iterator_core_access::advance(this->derived(), -n); - return this->derived(); - } - - /*! \p operator- subtracts a given quantity from this \p iterator_facade and returns a new \p iterator_facade referring to the element at the given position before this \p iterator_facade. - * \param n The quantity to decrement - * \return An \p iterator_facade pointing \p n elements before this \p iterator_facade. - */ - __host__ __device__ - Derived operator-(difference_type n) const - { - Derived result(this->derived()); - return result -= n; - } -}; // end iterator_facade - -/*! \cond - */ - -// Comparison operators -template -inline __host__ __device__ -// XXX it might be nice to implement this at some point -//typename enable_if_interoperable::type // exposition -bool -operator ==(iterator_facade const& lhs, - iterator_facade const& rhs) -{ - return iterator_core_access - ::equal(*static_cast(&lhs), - *static_cast(&rhs)); -} - -template -inline __host__ __device__ -// XXX it might be nice to implement this at some point -//typename enable_if_interoperable::type // exposition -bool -operator !=(iterator_facade const& lhs, - iterator_facade const& rhs) -{ - return !iterator_core_access - ::equal(*static_cast(&lhs), - *static_cast(&rhs)); -} - -template -inline __host__ __device__ -// XXX it might be nice to implement this at some point -//typename enable_if_interoperable::type // exposition -bool -operator <(iterator_facade const& lhs, - iterator_facade const& rhs) -{ - return 0 > iterator_core_access - ::distance_from(*static_cast(&lhs), - *static_cast(&rhs)); -} - -template -inline __host__ __device__ -// XXX it might be nice to implement this at some point -//typename enable_if_interoperable::type // exposition -bool -operator >(iterator_facade const& lhs, - iterator_facade const& rhs) -{ - return 0 < iterator_core_access - ::distance_from(*static_cast(&lhs), - *static_cast(&rhs)); -} - -template -inline __host__ __device__ -// XXX it might be nice to implement this at some point -//typename enable_if_interoperable::type // exposition -bool -operator <=(iterator_facade const& lhs, - iterator_facade const& rhs) -{ - return 0 >= iterator_core_access - ::distance_from(*static_cast(&lhs), - *static_cast(&rhs)); -} - -template -inline __host__ __device__ -// XXX it might be nice to implement this at some point -//typename enable_if_interoperable::type // exposition -bool -operator >=(iterator_facade const& lhs, - iterator_facade const& rhs) -{ - return 0 <= iterator_core_access - ::distance_from(*static_cast(&lhs), - *static_cast(&rhs)); -} - -// Iterator difference -template -inline __host__ __device__ - -// divine the type this operator returns -typename thrust::detail::distance_from_result< - iterator_facade, - iterator_facade ->::type - -operator-(iterator_facade const& lhs, - iterator_facade const& rhs) -{ - return iterator_core_access - ::distance_from(*static_cast(&lhs), - *static_cast(&rhs)); -} - -// Iterator addition -template -inline __host__ __device__ -Derived operator+ (iterator_facade const& i, - typename Derived::difference_type n) -{ - Derived tmp(static_cast(i)); - return tmp += n; -} - -template -inline __host__ __device__ -Derived operator+ (typename Derived::difference_type n, - iterator_facade const& i) -{ - Derived tmp(static_cast(i)); - return tmp += n; -} - -/*! \endcond - */ - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end thrust - diff --git a/compat/thrust/iterator/iterator_traits.h b/compat/thrust/iterator/iterator_traits.h deleted file mode 100644 index a16f219b07..0000000000 --- a/compat/thrust/iterator/iterator_traits.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/iterator_traits.h - * \brief Traits and metafunctions for reasoning about the traits of iterators - */ - -/* - * (C) Copyright David Abrahams 2003. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -/*! \p iterator_traits is a type trait class that provides a uniform - * interface for querying the properties of iterators at compile-time. - */ -template - struct iterator_traits - : public std::iterator_traits -{ -}; // end iterator_traits - - -template struct iterator_value; - -template struct iterator_pointer; - -template struct iterator_reference; - -template struct iterator_difference; - -template struct iterator_traversal; - -template struct iterator_system; - -// TODO remove this in Thrust v1.7.0 -template - struct THRUST_DEPRECATED iterator_space -{ - typedef THRUST_DEPRECATED typename iterator_system::type type; -}; - - -} // end thrust - -#include -#include -#include -#include -#include - diff --git a/compat/thrust/iterator/permutation_iterator.h b/compat/thrust/iterator/permutation_iterator.h deleted file mode 100644 index 509097b347..0000000000 --- a/compat/thrust/iterator/permutation_iterator.h +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/iterator/permutation_iterator.h - * \brief An iterator which performs a gather or scatter operation when dereferenced - */ - -/* - * (C) Copyright Toon Knapen 2001. - * (C) Copyright David Abrahams 2003. - * (C) Copyright Roland Richter 2003. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - -/*! \p permutation_iterator is an iterator which represents a pointer into a - * reordered view of a given range. \p permutation_iterator is an imprecise name; - * the reordered view need not be a strict permutation. This iterator is useful - * for fusing a scatter or gather operation with other algorithms. - * - * This iterator takes two arguments: - * - * - an iterator to the range \c V on which the "permutation" will be applied - * - the reindexing scheme that defines how the elements of \c V will be permuted. - * - * Note that \p permutation_iterator is not limited to strict permutations of the - * given range \c V. The distance between begin and end of the reindexing iterators - * is allowed to be smaller compared to the size of the range \c V, in which case - * the \p permutation_iterator only provides a "permutation" of a subrange of \c V. - * The indices neither need to be unique. In this same context, it must be noted - * that the past-the-end \p permutation_iterator is completely defined by means of - * the past-the-end iterator to the indices. - * - * The following code snippet demonstrates how to create a \p permutation_iterator - * which represents a reordering of the contents of a \p device_vector. - * - * \code - * #include - * #include - * ... - * thrust::device_vector values(4); - * values[0] = 10.0f; - * values[1] = 20.0f; - * values[2] = 30.0f; - * values[3] = 40.0f; - * values[4] = 50.0f; - * values[5] = 60.0f; - * values[6] = 70.0f; - * values[7] = 80.0f; - * - * thrust::device_vector indices(4); - * indices[0] = 2; - * indices[1] = 6; - * indices[2] = 1; - * indices[3] = 3; - * - * typedef thrust::device_vector::iterator ElementIterator; - * typedef thrust::device_vector::iterator IndexIterator; - * - * thrust::permutation_iterator iter(values.begin(), indices.begin()); - * - * *iter; // returns 30.0f; - * iter[0]; // returns 30.0f; - * iter[1]; // returns 70.0f; - * iter[2]; // returns 20.0f; - * iter[3]; // returns 40.0f; - * - * // iter[4] is an out-of-bounds error - * - * *iter = -1.0f; // sets values[2] to -1.0f; - * iter[0] = -1.0f; // sets values[2] to -1.0f; - * iter[1] = -1.0f; // sets values[6] to -1.0f; - * iter[2] = -1.0f; // sets values[1] to -1.0f; - * iter[3] = -1.0f; // sets values[3] to -1.0f; - * - * // values is now {10, -1, -1, -1, 50, 60, -1, 80} - * \endcode - * - * \see make_permutation_iterator - */ -template - class permutation_iterator - : public thrust::detail::permutation_iterator_base< - ElementIterator, - IndexIterator - >::type -{ - /*! \cond - */ - private: - typedef typename detail::permutation_iterator_base::type super_t; - - friend class thrust::iterator_core_access; - /*! \endcond - */ - - public: - /*! Null constructor calls the null constructor of this \p permutation_iterator's - * element iterator. - */ - __host__ __device__ - permutation_iterator() - : m_element_iterator() {} - - /*! Constructor accepts an \c ElementIterator into a range of values and an - * \c IndexIterator into a range of indices defining the indexing scheme on the - * values. - * - * \param x An \c ElementIterator pointing this \p permutation_iterator's range of values. - * \param y An \c IndexIterator pointing to an indexing scheme to use on \p x. - */ - __host__ __device__ - explicit permutation_iterator(ElementIterator x, IndexIterator y) - : super_t(y), m_element_iterator(x) {} - - /*! Copy constructor accepts a related \p permutation_iterator. - * \param r A compatible \p permutation_iterator to copy from. - */ - template - __host__ __device__ - permutation_iterator(permutation_iterator const &r - // XXX remove these guards when we have static_assert - , typename detail::enable_if_convertible::type* = 0 - , typename detail::enable_if_convertible::type* = 0 - ) - : super_t(r.base()), m_element_iterator(r.m_element_iterator) - {} - - /*! \cond - */ - private: - __thrust_hd_warning_disable__ - __host__ __device__ - typename super_t::reference dereference() const - { - return *(m_element_iterator + *this->base()); - } - - // make friends for the copy constructor - template friend class permutation_iterator; - - ElementIterator m_element_iterator; - /*! \endcond - */ -}; // end permutation_iterator - - -/*! \p make_permutation_iterator creates a \p permutation_iterator - * from an \c ElementIterator pointing to a range of elements to "permute" - * and an \c IndexIterator pointing to a range of indices defining an indexing - * scheme on the values. - * - * \param e An \c ElementIterator pointing to a range of values. - * \param i An \c IndexIterator pointing to an indexing scheme to use on \p e. - * \return A new \p permutation_iterator which permutes the range \p e by \p i. - * \see permutation_iterator - */ -template -__host__ __device__ -permutation_iterator make_permutation_iterator(ElementIterator e, IndexIterator i) -{ - return permutation_iterator(e,i); -} - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end thrust - diff --git a/compat/thrust/iterator/retag.h b/compat/thrust/iterator/retag.h deleted file mode 100644 index 660da8f2fd..0000000000 --- a/compat/thrust/iterator/retag.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/iterator/retag.h - * \brief Functionality for altering an iterator's associated system. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \ingroup iterator_tags - * \{ - */ - -#if 0 -/*! \p reinterpret_tag returns a copy of an iterator and changes the type of the result's system tag. - * \tparam Tag Any system tag. - * \tparam Iterator Any iterator type. - * \param iter The iterator of interest. - * \return An iterator of unspecified type whose system tag is \p Tag and whose behavior is otherwise - * equivalent to \p iter. - * \note Unlike \p retag, \p reinterpret_tag does not enforce that the converted-to system tag be - * related to the converted-from system tag. - * \see retag - */ -template -unspecified_iterator_type reinterpret_tag(Iterator iter); - -/*! \p retag returns a copy of an iterator and changes the type of the result's system tag. - * \tparam Tag \p Tag shall be convertible to thrust::iterator_system::type, - * or thrust::iterator_system::type is a base type of \p Tag. - * \tparam Iterator Any iterator type. - * \param iter The iterator of interest. - * \return An iterator of unspecified type whose system tag is \p Tag and whose behavior is - * otherwise equivalent to \p iter. - * \note Unlike \p reinterpret_tag, \p retag enforces that the converted-to system tag be - * related to the converted-from system tag. - * \see reinterpret_tag - */ -template -unspecified_iterator_type retag(Iterator iter); -#endif - -/*! \} // iterator_tags - */ - - -} // end thrust - diff --git a/compat/thrust/iterator/reverse_iterator.h b/compat/thrust/iterator/reverse_iterator.h deleted file mode 100644 index 03f03396d3..0000000000 --- a/compat/thrust/iterator/reverse_iterator.h +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/reverse_iterator.h - * \brief An iterator adaptor which adapts another iterator to traverse backwards - */ - -/* - * (C) Copyright David Abrahams 2002. - * (C) Copyright Jeremy Siek 2002. - * (C) Copyright Thomas Witt 2002. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - -/*! \p reverse_iterator is an iterator which represents a pointer into a - * reversed view of a given range. In this way, \p reverse_iterator allows - * backwards iteration through a bidirectional input range. - * - * It is important to note that although \p reverse_iterator is constructed - * from a given iterator, it points to the element preceding it. In this way, - * the past-the-end \p reverse_iterator of a given range points to the element - * preceding the first element of the input range. By the same token, the first - * \p reverse_iterator of a given range is constructed from a past-the-end iterator - * of the original range yet points to the last element of the input. - * - * The following code snippet demonstrates how to create a \p reverse_iterator - * which represents a reversed view of the contents of a \p device_vector. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(4); - * v[0] = 0.0f; - * v[1] = 1.0f; - * v[2] = 2.0f; - * v[3] = 3.0f; - * - * typedef thrust::device_vector::iterator Iterator; - * - * // note that we point the iterator to the *end* of the device_vector - * thrust::reverse_iterator iter(values.end()); - * - * *iter; // returns 3.0f; - * iter[0]; // returns 3.0f; - * iter[1]; // returns 2.0f; - * iter[2]; // returns 1.0f; - * iter[3]; // returns 0.0f; - * - * // iter[4] is an out-of-bounds error - * \endcode - * - * Since reversing a range is a common operation, containers like \p device_vector - * have nested typedefs for declaration shorthand and methods for constructing - * reverse_iterators. The following code snippet is equivalent to the previous: - * - * \code - * #include - * ... - * thrust::device_vector v(4); - * v[0] = 0.0f; - * v[1] = 1.0f; - * v[2] = 2.0f; - * v[3] = 3.0f; - * - * // we use the nested type reverse_iterator to refer to a reversed view of - * // a device_vector and the method rbegin() to create a reverse_iterator pointing - * // to the beginning of the reversed device_vector - * thrust::device_iterator::reverse_iterator iter = values.rbegin(); - * - * *iter; // returns 3.0f; - * iter[0]; // returns 3.0f; - * iter[1]; // returns 2.0f; - * iter[2]; // returns 1.0f; - * iter[3]; // returns 0.0f; - * - * // iter[4] is an out-of-bounds error - * - * // similarly, rend() points to the end of the reversed sequence: - * assert(values.rend() == (iter + 4)); - * \endcode - * - * Finally, the following code snippet demonstrates how to use reverse_iterator to - * perform a reversed prefix sum operation on the contents of a device_vector: - * - * \code - * #include - * #include - * ... - * thrust::device_vector v(5); - * v[0] = 0; - * v[1] = 1; - * v[2] = 2; - * v[3] = 3; - * v[4] = 4; - * - * thrust::device_vector result(5); - * - * // exclusive scan v into result in reverse - * thrust::exclusive_scan(v.rbegin(), v.rend(), result.begin()); - * - * // result is now {0, 4, 7, 9, 10} - * \endcode - * - * \see make_reverse_iterator - */ -template - class reverse_iterator - : public detail::reverse_iterator_base::type -{ - /*! \cond - */ - private: - typedef typename thrust::detail::reverse_iterator_base< - BidirectionalIterator - >::type super_t; - - friend class thrust::iterator_core_access; - /*! \endcond - */ - - public: - /*! Default constructor does nothing. - */ - __host__ __device__ - reverse_iterator(void) {} - - /*! \p Constructor accepts a \c BidirectionalIterator pointing to a range - * for this \p reverse_iterator to reverse. - * - * \param x A \c BidirectionalIterator pointing to a range to reverse. - */ - __host__ __device__ - explicit reverse_iterator(BidirectionalIterator x); - - /*! \p Copy constructor allows construction from a related compatible - * \p reverse_iterator. - * - * \param r A \p reverse_iterator to copy from. - */ - template - __host__ __device__ - reverse_iterator(reverse_iterator const &r -// XXX msvc screws this up -// XXX remove these guards when we have static_assert -#ifndef _MSC_VER - , typename thrust::detail::enable_if< - thrust::detail::is_convertible< - OtherBidirectionalIterator, - BidirectionalIterator - >::value - >::type * = 0 -#endif // _MSC_VER - ); - - /*! \cond - */ - private: - __thrust_hd_warning_disable__ - __host__ __device__ - typename super_t::reference dereference(void) const; - - __host__ __device__ - void increment(void); - - __host__ __device__ - void decrement(void); - - __host__ __device__ - void advance(typename super_t::difference_type n); - - template - __host__ __device__ - typename super_t::difference_type - distance_to(reverse_iterator const &y) const; - /*! \endcond - */ -}; // end reverse_iterator - - -/*! \p make_reverse_iterator creates a \p reverse_iterator - * from a \c BidirectionalIterator pointing to a range of elements to reverse. - * - * \param x A \c BidirectionalIterator pointing to a range to reverse. - * \return A new \p reverse_iterator which reverses the range \p x. - */ -template -__host__ __device__ -reverse_iterator make_reverse_iterator(BidirectionalIterator x); - - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end thrust - -#include - diff --git a/compat/thrust/iterator/transform_iterator.h b/compat/thrust/iterator/transform_iterator.h deleted file mode 100644 index 985b61b775..0000000000 --- a/compat/thrust/iterator/transform_iterator.h +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/transform_iterator.h - * \brief An iterator which adapts another iterator by applying a function to the result of its dereference - */ - -/* - * (C) Copyright David Abrahams 2002. - * (C) Copyright Jeremy Siek 2002. - * (C) Copyright Thomas Witt 2002. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include - -// #include the details first -#include -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - -/*! \p transform_iterator is an iterator which represents a pointer into a range - * of values after transformation by a function. This iterator is useful for - * creating a range filled with the result of applying an operation to another range - * without either explicitly storing it in memory, or explicitly executing the transformation. - * Using \p transform_iterator facilitates kernel fusion by deferring the execution - * of a transformation until the value is needed while saving both memory capacity - * and bandwidth. - * - * The following code snippet demonstrates how to create a \p transform_iterator - * which represents the result of \c sqrtf applied to the contents of a \p device_vector. - * - * \code - * #include - * #include - * - * // note: functor inherits from unary_function - * struct square_root : public thrust::unary_function - * { - * __host__ __device__ - * float operator()(float x) const - * { - * return sqrtf(x); - * } - * }; - * - * int main(void) - * { - * thrust::device_vector v(4); - * v[0] = 1.0f; - * v[1] = 4.0f; - * v[2] = 9.0f; - * v[3] = 16.0f; - * - * typedef thrust::device_vector::iterator FloatIterator; - * - * thrust::transform_iterator iter(v.begin(), square_root()); - * - * *iter; // returns 1.0f - * iter[0]; // returns 1.0f; - * iter[1]; // returns 2.0f; - * iter[2]; // returns 3.0f; - * iter[3]; // returns 4.0f; - * - * // iter[4] is an out-of-bounds error - * } - * \endcode - * - * This next example demonstrates how to use a \p transform_iterator with the - * \p thrust::reduce function to compute the sum of squares of a sequence. - * We will create temporary \p transform_iterators with the - * \p make_transform_iterator function in order to avoid explicitly specifying their type: - * - * \code - * #include - * #include - * #include - * #include - * - * // note: functor inherits from unary_function - * struct square : public thrust::unary_function - * { - * __host__ __device__ - * float operator()(float x) const - * { - * return x * x; - * } - * }; - * - * int main(void) - * { - * // initialize a device array - * thrust::device_vector v(4); - * v[0] = 1.0f; - * v[1] = 2.0f; - * v[2] = 3.0f; - * v[3] = 4.0f; - * - * float sum_of_squares = - * thrust::reduce(thrust::make_transform_iterator(v.begin(), square()), - * thrust::make_transform_iterator(v.end(), square())); - * - * std::cout << "sum of squares: " << sum_of_squares << std::endl; - * return 0; - * } - * \endcode - * - * Note that in the previous two examples the transform functor (namely \c square_root - * and \c square) inherits from \c thrust::unary_function. Inheriting from - * \c thrust::unary_function ensures that a functor is a valid \c AdaptableUnaryFunction - * and provides all the necessary \c typedef declarations. The \p transform_iterator - * can also be applied to a \c UnaryFunction that does not inherit from - * \c thrust::unary_function using an optional template argument. The following example - * illustrates how to use the third template argument to specify the \c result_type of - * the function. - * - * \code - * #include - * #include - * - * // note: functor *does not* inherit from unary_function - * struct square_root - * { - * __host__ __device__ - * float operator()(float x) const - * { - * return sqrtf(x); - * } - * }; - * - * int main(void) - * { - * thrust::device_vector v(4); - * v[0] = 1.0f; - * v[1] = 4.0f; - * v[2] = 9.0f; - * v[3] = 16.0f; - * - * typedef thrust::device_vector::iterator FloatIterator; - * - * // note: float result_type is specified explicitly - * thrust::transform_iterator iter(v.begin(), square_root()); - * - * *iter; // returns 1.0f - * iter[0]; // returns 1.0f; - * iter[1]; // returns 2.0f; - * iter[2]; // returns 3.0f; - * iter[3]; // returns 4.0f; - * - * // iter[4] is an out-of-bounds error - * } - * \endcode - * - * \see make_transform_iterator - */ -template - class transform_iterator - : public detail::transform_iterator_base::type -{ - /*! \cond - */ - public: - typedef typename - detail::transform_iterator_base::type - super_t; - - friend class thrust::iterator_core_access; - /*! \endcond - */ - - public: - /*! Null constructor does nothing. - */ - __host__ __device__ - transform_iterator() {} - - /*! This constructor takes as arguments an \c Iterator and an \c AdaptableUnaryFunction - * and copies them to a new \p transform_iterator. - * - * \param x An \c Iterator pointing to the input to this \p transform_iterator's \c AdaptableUnaryFunction. - * \param f An \c AdaptableUnaryFunction used to transform the objects pointed to by \p x. - */ - __host__ __device__ - transform_iterator(Iterator const& x, AdaptableUnaryFunction f) - : super_t(x), m_f(f) { - } - - /*! This explicit constructor copies the value of a given \c Iterator and creates - * this \p transform_iterator's \c AdaptableUnaryFunction using its null constructor. - * - * \param x An \c Iterator to copy. - */ - __host__ __device__ - explicit transform_iterator(Iterator const& x) - : super_t(x) { } - - /*! This copy constructor creates a new \p transform_iterator from another - * \p transform_iterator. - * - * \param other The \p transform_iterator to copy. - */ - template - __host__ __device__ - transform_iterator(const transform_iterator &other, - typename thrust::detail::enable_if_convertible::type* = 0, - typename thrust::detail::enable_if_convertible::type* = 0) - : super_t(other.base()), m_f(other.functor()) {} - - /*! Copy assignment operator copies from another \p transform_iterator. - * \p other The other \p transform_iterator to copy - * \return *this - * - * \note If the type of this \p transform_iterator's functor is not copy assignable - * (for example, if it is a lambda) it is not an error to call this function. - * In this case, however, the functor will not be modified. - * - * In any case, this \p transform_iterator's underlying iterator will be copy assigned. - */ - __host__ __device__ - transform_iterator &operator=(const transform_iterator &other) - { - return do_assign(other, - // XXX gcc 4.2.1 crashes on is_copy_assignable; just assume the functor is assignable as a WAR -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION <= 40201) - thrust::detail::true_type() -#else - typename thrust::detail::is_copy_assignable::type() -#endif // THRUST_HOST_COMPILER - ); - } - - /*! This method returns a copy of this \p transform_iterator's \c AdaptableUnaryFunction. - * \return A copy of this \p transform_iterator's \c AdaptableUnaryFunction. - */ - __host__ __device__ - AdaptableUnaryFunction functor() const - { return m_f; } - - /*! \cond - */ - private: - __host__ __device__ - transform_iterator &do_assign(const transform_iterator &other, thrust::detail::true_type) - { - super_t::operator=(other); - - // do assign to m_f - m_f = other.functor(); - - return *this; - } - - __host__ __device__ - transform_iterator &do_assign(const transform_iterator &other, thrust::detail::false_type) - { - super_t::operator=(other); - - // don't assign to m_f - - return *this; - } - - __thrust_hd_warning_disable__ - __host__ __device__ - typename super_t::reference dereference() const - { - // XXX consider making this a member instead of a temporary created inside dereference - thrust::detail::host_device_function wrapped_f(m_f); - - return wrapped_f(*this->base()); - } - - // tag this as mutable per Dave Abrahams in this thread: - // http://lists.boost.org/Archives/boost/2004/05/65332.php - mutable AdaptableUnaryFunction m_f; - - /*! \endcond - */ -}; // end transform_iterator - - -/*! \p make_transform_iterator creates a \p transform_iterator - * from an \c Iterator and \c AdaptableUnaryFunction. - * - * \param it The \c Iterator pointing to the input range of the - * newly created \p transform_iterator. - * \param fun The \c AdaptableUnaryFunction used to transform the range pointed - * to by \p it in the newly created \p transform_iterator. - * \return A new \p transform_iterator which transforms the range at - * \p it by \p fun. - * \see transform_iterator - */ -template -inline __host__ __device__ -transform_iterator -make_transform_iterator(Iterator it, AdaptableUnaryFunction fun) -{ - return transform_iterator(it, fun); -} // end make_transform_iterator - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end thrust - diff --git a/compat/thrust/iterator/zip_iterator.h b/compat/thrust/iterator/zip_iterator.h deleted file mode 100644 index 8e7299c407..0000000000 --- a/compat/thrust/iterator/zip_iterator.h +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/iterator/zip_iterator.h - * \brief An iterator which returns a tuple of the result of dereferencing - * a tuple of iterators when dereferenced - */ - -/* - * Copyright David Abrahams and Thomas Becker 2000-2006. - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup iterators - * \{ - */ - -/*! \addtogroup fancyiterator Fancy Iterators - * \ingroup iterators - * \{ - */ - -/*! \p zip_iterator is an iterator which represents a pointer into a range - * of \p tuples whose elements are themselves taken from a \p tuple of input - * iterators. This iterator is useful for creating a virtual array of structures - * while achieving the same performance and bandwidth as the structure of arrays - * idiom. \p zip_iterator also facilitates kernel fusion by providing a convenient - * means of amortizing the execution of the same operation over multiple ranges. - * - * The following code snippet demonstrates how to create a \p zip_iterator - * which represents the result of "zipping" multiple ranges together. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector int_v(3); - * int_v[0] = 0; int_v[1] = 1; int_v[2] = 2; - * - * thrust::device_vector float_v(3); - * float_v[0] = 0.0f; float_v[1] = 1.0;f float_v[2] = 2.0f; - * - * thrust::device_vector char_v(3); - * char_v[0] = 'a'; char_v[1] = 'b'; char_v[2] = 'c'; - * - * // typedef these iterators for shorthand - * typedef thrust::device_vector::iterator IntIterator; - * typedef thrust::device_vector::iterator FloatIterator; - * typedef thrust::device_vector::iterator CharIterator; - * - * // typedef a tuple of these iterators - * typedef thrust::tuple IteratorTuple; - * - * // typedef the zip_iterator of this tuple - * typedef thrust::zip_iterator ZipIterator; - * - * // finally, create the zip_iterator - * ZipIterator iter(thrust::make_tuple(int_v.begin(), float_v.begin(), char_v.begin())); - * - * *iter; // returns (0, 0.0f, 'a') - * iter[0]; // returns (0, 0.0f, 'a') - * iter[1]; // returns (1, 1.0f, 'b') - * iter[2]; // returns (2, 2.0f, 'c') - * - * thrust::get<0>(iter[2]); // returns 2 - * thrust::get<1>(iter[0]); // returns 0.0f - * thrust::get<2>(iter[1]); // returns 'b' - * - * // iter[3] is an out-of-bounds error - * \endcode - * - * Defining the type of a \p zip_iterator can be complex. The next code example demonstrates - * how to use the \p make_zip_iterator function with the \p make_tuple function to avoid - * explicitly specifying the type of the \p zip_iterator. This example shows how to use - * \p zip_iterator to copy multiple ranges with a single call to \p thrust::copy. - * - * \code - * #include - * #include - * #include - * - * int main(void) - * { - * thrust::device_vector int_in(3), int_out(3); - * int_in[0] = 0; - * int_in[1] = 1; - * int_in[2] = 2; - * - * thrust::device_vector float_in(3), float_out(3); - * float_in[0] = 0.0f; - * float_in[1] = 10.0f; - * float_in[2] = 20.0f; - * - * thrust::copy(thrust::make_zip_iterator(thrust::make_tuple(int_in.begin(), float_in.begin())), - * thrust::make_zip_iterator(thrust::make_tuple(int_in.end(), float_in.end())), - * thrust::make_zip_iterator(thrust::make_tuple(int_out.begin(),float_out.begin()))); - * - * // int_out is now [0, 1, 2] - * // float_out is now [0.0f, 10.0f, 20.0f] - * - * return 0; - * } - * \endcode - * - * \see make_zip_iterator - * \see make_tuple - * \see tuple - * \see get - */ -template - class zip_iterator - : public detail::zip_iterator_base::type -{ - public: - /*! Null constructor does nothing. - */ - inline __host__ __device__ - zip_iterator(void); - - /*! This constructor creates a new \p zip_iterator from a - * \p tuple of iterators. - * - * \param iterator_tuple The \p tuple of iterators to copy from. - */ - inline __host__ __device__ - zip_iterator(IteratorTuple iterator_tuple); - - /*! This copy constructor creates a new \p zip_iterator from another - * \p zip_iterator. - * - * \param other The \p zip_iterator to copy. - */ - template - inline __host__ __device__ - zip_iterator(const zip_iterator &other, - typename thrust::detail::enable_if_convertible< - OtherIteratorTuple, - IteratorTuple - >::type * = 0); - - /*! This method returns a \c const reference to this \p zip_iterator's - * \p tuple of iterators. - * - * \return A \c const reference to this \p zip_iterator's \p tuple - * of iterators. - */ - inline __host__ __device__ - const IteratorTuple &get_iterator_tuple() const; - - /*! \cond - */ - private: - typedef typename - detail::zip_iterator_base::type super_t; - - friend class thrust::iterator_core_access; - - // Dereferencing returns a tuple built from the dereferenced - // iterators in the iterator tuple. - __host__ __device__ - typename super_t::reference dereference() const; - - // Two zip_iterators are equal if the two first iterators of the - // tuple are equal. Note this differs from Boost's implementation, which - // considers the entire tuple. - template - inline __host__ __device__ - bool equal(const zip_iterator &other) const; - - // Advancing a zip_iterator means to advance all iterators in the tuple - inline __host__ __device__ - void advance(typename super_t::difference_type n); - - // Incrementing a zip iterator means to increment all iterators in the tuple - inline __host__ __device__ - void increment(); - - // Decrementing a zip iterator means to decrement all iterators in the tuple - inline __host__ __device__ - void decrement(); - - // Distance is calculated using the first iterator in the tuple. - template - inline __host__ __device__ - typename super_t::difference_type - distance_to(const zip_iterator &other) const; - - // The iterator tuple. - IteratorTuple m_iterator_tuple; - - /*! \endcond - */ -}; // end zip_iterator - -/*! \p make_zip_iterator creates a \p zip_iterator from a \p tuple - * of iterators. - * - * \param t The \p tuple of iterators to copy. - * \return A newly created \p zip_iterator which zips the iterators encapsulated in \p t. - * - * \see zip_iterator - */ -template -inline __host__ __device__ -zip_iterator make_zip_iterator(IteratorTuple t); - -/*! \} // end fancyiterators - */ - -/*! \} // end iterators - */ - -} // end thrust - -#include - diff --git a/compat/thrust/logical.h b/compat/thrust/logical.h deleted file mode 100644 index 21510f3f21..0000000000 --- a/compat/thrust/logical.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file logical.h - * \brief Logical operations on ranges - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup reductions - * \{ - * \addtogroup logical - * \ingroup reductions - * \{ - */ - - -/*! \p all_of determines whether all elements in a range satify a predicate. - * Specifically, \p all_of returns \c true if pred(*i) is \c true - * for every iterator \c i in the range [first, last) and - * \c false otherwise. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param pred A predicate used to test range elements. - * \return \c true, if all elements satisfy the predicate; \c false, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * \tparam Predicate must be a model of Predicate. - * - * \code - * #include - * #include - * #include - * ... - * bool A[3] = {true, true, false}; - * - * thrust::all_of(thrust::host, A, A + 2, thrust::identity()); // returns true - * thrust::all_of(thrust::host, A, A + 3, thrust::identity()); // returns false - * - * // empty range - * thrust::all_of(thrust::host, A, A, thrust::identity()); // returns false - * - * \endcode - * - * \see any_of - * \see none_of - * \see transform_reduce - */ -template -bool all_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); - - -/*! \p all_of determines whether all elements in a range satify a predicate. - * Specifically, \p all_of returns \c true if pred(*i) is \c true - * for every iterator \c i in the range [first, last) and - * \c false otherwise. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param pred A predicate used to test range elements. - * \return \c true, if all elements satisfy the predicate; \c false, otherwise. - * - * \tparam InputIterator is a model of Input Iterator, - * \tparam Predicate must be a model of Predicate. - * - * \code - * #include - * #include - * ... - * bool A[3] = {true, true, false}; - * - * thrust::all_of(A, A + 2, thrust::identity()); // returns true - * thrust::all_of(A, A + 3, thrust::identity()); // returns false - * - * // empty range - * thrust::all_of(A, A, thrust::identity()); // returns false - * - * \endcode - * - * \see any_of - * \see none_of - * \see transform_reduce - */ -template -bool all_of(InputIterator first, InputIterator last, Predicate pred); - - -/*! \p any_of determines whether any element in a range satifies a predicate. - * Specifically, \p any_of returns \c true if pred(*i) is \c true - * for any iterator \c i in the range [first, last) and - * \c false otherwise. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param pred A predicate used to test range elements. - * \return \c true, if any element satisfies the predicate; \c false, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * \tparam Predicate must be a model of Predicate. - * - * \code - * #include - * #include - * #include - * ... - * bool A[3] = {true, true, false}; - * - * thrust::any_of(thrust::host, A, A + 2, thrust::identity()); // returns true - * thrust::any_of(thrust::host, A, A + 3, thrust::identity()); // returns true - * - * thrust::any_of(thrust::host, A + 2, A + 3, thrust::identity()); // returns false - * - * // empty range - * thrust::any_of(thrust::host, A, A, thrust::identity()); // returns false - * \endcode - * - * \see all_of - * \see none_of - * \see transform_reduce - */ -template -bool any_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); - - -/*! \p any_of determines whether any element in a range satifies a predicate. - * Specifically, \p any_of returns \c true if pred(*i) is \c true - * for any iterator \c i in the range [first, last) and - * \c false otherwise. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param pred A predicate used to test range elements. - * \return \c true, if any element satisfies the predicate; \c false, otherwise. - * - * \tparam InputIterator is a model of Input Iterator, - * \tparam Predicate must be a model of Predicate. - * - * \code - * #include - * #include - * ... - * bool A[3] = {true, true, false}; - * - * thrust::any_of(A, A + 2, thrust::identity()); // returns true - * thrust::any_of(A, A + 3, thrust::identity()); // returns true - * - * thrust::any_of(A + 2, A + 3, thrust::identity()); // returns false - * - * // empty range - * thrust::any_of(A, A, thrust::identity()); // returns false - * \endcode - * - * \see all_of - * \see none_of - * \see transform_reduce - */ -template -bool any_of(InputIterator first, InputIterator last, Predicate pred); - - -/*! \p none_of determines whether no element in a range satifies a predicate. - * Specifically, \p none_of returns \c true if there is no iterator \c i in - * the range [first, last) such that pred(*i) is \c true, - * and \c false otherwise. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param pred A predicate used to test range elements. - * \return \c true, if no element satisfies the predicate; \c false, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * \tparam Predicate must be a model of Predicate. - * - * \code - * #include - * #include - * #include - * ... - * bool A[3] = {true, true, false}; - * - * thrust::none_of(thrust::host, A, A + 2, thrust::identity()); // returns false - * thrust::none_of(thrust::host, A, A + 3, thrust::identity()); // returns false - * - * thrust::none_of(thrust::host, A + 2, A + 3, thrust::identity()); // returns true - * - * // empty range - * thrust::none_of(thrust::host, A, A, thrust::identity()); // returns true - * \endcode - * - * \see all_of - * \see any_of - * \see transform_reduce - */ -template -bool none_of(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last, Predicate pred); - - -/*! \p none_of determines whether no element in a range satifies a predicate. - * Specifically, \p none_of returns \c true if there is no iterator \c i in - * the range [first, last) such that pred(*i) is \c true, - * and \c false otherwise. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param pred A predicate used to test range elements. - * \return \c true, if no element satisfies the predicate; \c false, otherwise. - * - * \tparam InputIterator is a model of Input Iterator, - * \tparam Predicate must be a model of Predicate. - * - * \code - * #include - * #include - * ... - * bool A[3] = {true, true, false}; - * - * thrust::none_of(A, A + 2, thrust::identity()); // returns false - * thrust::none_of(A, A + 3, thrust::identity()); // returns false - * - * thrust::none_of(A + 2, A + 3, thrust::identity()); // returns true - * - * // empty range - * thrust::none_of(A, A, thrust::identity()); // returns true - * \endcode - * - * \see all_of - * \see any_of - * \see transform_reduce - */ -template -bool none_of(InputIterator first, InputIterator last, Predicate pred); - - -/*! \} // end logical - * \} // end reductions - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/memory.h b/compat/thrust/memory.h deleted file mode 100644 index 6362de4064..0000000000 --- a/compat/thrust/memory.h +++ /dev/null @@ -1,538 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/memory.h - * \brief Abstractions for Thrust's memory model. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup memory_management Memory Management - * \addtogroup memory_management_classes Memory Management Classes - * \ingroup memory_management - * \{ - */ - -/*! \p pointer stores a pointer to an object allocated in memory. Like \p device_ptr, this - * type ensures type safety when dispatching standard algorithms on ranges resident in memory. - * - * \p pointer generalizes \p device_ptr by relaxing the backend system associated with the \p pointer. - * Instead of the backend system specified by \p THRUST_DEFAULT_DEVICE_BACKEND, \p pointer's - * system is given by its second template parameter, \p Tag. For the purpose of Thrust dispatch, - * device_ptr and pointer are considered equivalent. - * - * The raw pointer encapsulated by a \p pointer may be obtained through its get member function - * or the \p raw_pointer_cast free function. - * - * \tparam Element specifies the type of the pointed-to object. - * - * \tparam Tag specifies the system with which this \p pointer is associated. This may be any Thrust - * backend system, or a user-defined tag. - * - * \tparam Reference allows the client to specify the reference type returned upon derereference. - * By default, this type is reference. - * - * \tparam Derived allows the client to specify the name of the derived type when \p pointer is used as - * a base class. This is useful to ensure that arithmetic on values of the derived type return - * values of the derived type as a result. By default, this type is pointer. - * - * \note \p pointer is not a smart pointer; it is the client's responsibility to deallocate memory - * pointer to by \p pointer. - * - * \see device_ptr - * \see reference - * \see raw_pointer_cast - */ -// define pointer for the purpose of Doxygenating it -// it is actually defined elsewhere -#if 0 -template - class pointer -{ - public: - /*! The type of the raw pointer - */ - typedef typename super_t::base_type raw_pointer; - - /*! \p pointer's default constructor initializes its encapsulated pointer to \c 0 - */ - __host__ __device__ - pointer(); - - /*! This constructor allows construction of a pointer from a T*. - * - * \param ptr A raw pointer to copy from, presumed to point to a location in \p Tag's memory. - * \tparam OtherElement \p OtherElement shall be convertible to \p Element. - */ - template - __host__ __device__ - explicit pointer(OtherElement *ptr); - - /*! This contructor allows initialization from another pointer-like object. - * - * \param other The \p OtherPointer to copy. - * - * \tparam OtherPointer The tag associated with \p OtherPointer shall be convertible to \p Tag, - * and its element type shall be convertible to \p Element. - */ - template - __host__ __device__ - pointer(const OtherPointer &other, - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer - >::type * = 0); - - /*! Assignment operator allows assigning from another pointer-like object with related type. - * - * \param other The other pointer-like object to assign from. - * \return *this - * - * \tparam OtherPointer The tag associated with \p OtherPointer shall be convertible to \p Tag, - * and its element type shall be convertible to \p Element. - */ - template - __host__ __device__ - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer, - derived_type & - >::type - operator=(const OtherPointer &other); - - /*! \p get returns this \p pointer's encapsulated raw pointer. - * \return This \p pointer's raw pointer. - */ - __host__ __device__ - Element *get() const; -}; -#endif - -/*! \p reference is a wrapped reference to an object stored in memory. \p reference generalizes - * \p device_reference by relaxing the type of pointer associated with the object. \p reference - * is the type of the result of dereferencing a tagged pointer-like object such as \p pointer, and - * intermediates operations on objects existing in a remote memory. - * - * \tparam Element specifies the type of the referent object. - * \tparam Pointer specifies the type of the result of taking the address of \p reference. - * \tparam Derived allows the client to specify the name of the derived type when \p reference is used as - * a base class. This is useful to ensure that assignment to objects of the derived type return - * values of the derived type as a result. By default, this type is reference. - */ -// define pointer for the purpose of Doxygenating it -// it is actually defined elsewhere -#if 0 -template - class reference -{ - public: - /*! The type of this \p reference's wrapped pointers. - */ - typedef Pointer pointer; - - /*! The \p value_type of this \p reference. - */ - typedef typename thrust::detail::remove_const::type value_type; - - /*! This copy constructor initializes this \p reference - * to refer to an object pointed to by the given \p pointer. After - * this \p reference is constructed, it shall refer to the - * object pointed to by \p ptr. - * - * \param ptr A \p pointer to copy from. - */ - __host__ __device__ - explicit reference(const pointer &ptr); - - /*! This copy constructor accepts a const reference to another - * \p reference of related type. After this \p reference is constructed, - * it shall refer to the same object as \p other. - * - * \param other A \p reference to copy from. - * \tparam OtherElement the element type of the other \p reference. - * \tparam OtherPointer the pointer type of the other \p reference. - * \tparam OtherDerived the derived type of the other \p reference. - * - * \note This constructor is templated primarily to allow initialization of - * reference from reference. - */ - template - __host__ __device__ - reference(const reference &other, - typename thrust::detail::enable_if_convertible< - typename reference::pointer, - pointer - >::type * = 0); - - /*! Copy assignment operator copy assigns from another \p reference. - * - * \param other The other \p reference to assign from. - * \return static_cast(*this) - */ - __host__ __device__ - derived_type &operator=(const reference &other); - - /*! Assignment operator copy assigns from another \p reference of related type. - * - * \param other The other \p reference to assign from. - * \return static_cast(*this) - * - * \tparam OtherElement the element type of the other \p reference. - * \tparam OtherPointer the pointer type of the other \p reference. - * \tparam OtherDerived the derived type of the other \p reference. - */ - template - __host__ __device__ - derived_type &operator=(const reference &other); - - /*! Assignment operator assigns from a \p value_type. - * - * \param x The \p value_type to assign from. - * \return static_cast(*this). - */ - __host__ __device__ - derived_type &operator=(const value_type &x); - - /*! Address-of operator returns a \p pointer pointing to the object - * referenced by this \p reference. It does not return the address of this - * \p reference. - * - * \return A \p pointer pointing to the referenct object. - */ - __host__ __device__ - pointer operator&() const; - - /*! Conversion operator converts this \p reference to \p value_type by - * returning a copy of the referent object. - * - * \return A copy of the referent object. - */ - __host__ __device__ - operator value_type () const; - - /*! Swaps the value of the referent object with another. - * - * \param other The other \p reference with which to swap. - * \note The argument is of type \p derived_type rather than \p reference. - */ - __host__ __device__ - void swap(derived_type &other); - - /*! Prefix increment operator increments the referent object. - * - * \return static_Cast(*this). - * - * \note Documentation for other arithmetic operators omitted for brevity. - */ - derived_type &operator++(); -}; -#endif - -/*! \} - */ - -/*! - * \addtogroup memory_management_functions Memory Management Functions - * \ingroup memory_management - * \{ - */ - - -/*! \addtogroup allocation_functions - * \{ - */ - - -/*! This version of \p malloc allocates untyped uninitialized storage associated with a given system. - * - * \param system The Thrust system with which to associate the storage. - * \param n The number of bytes of storage to allocate. - * \return If allocation succeeds, a pointer to the allocated storage; a null pointer otherwise. - * The pointer must be deallocated with \p thrust::free. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * - * \pre \p DerivedPolicy must be publically derived from thrust::execution_policy. - * - * The following code snippet demonstrates how to use \p malloc to allocate a range of memory - * associated with Thrust's device system. - * - * \code - * #include - * ... - * // allocate some memory with thrust::malloc - * const int N = 100; - * thrust::device_system_tag device_sys; - * thrust::pointer void_ptr = thrust::malloc(device_sys, N); - * - * // manipulate memory - * ... - * - * // deallocate void_ptr with thrust::free - * thrust::free(device_sys, void_ptr); - * \endcode - * - * \see free - * \see device_malloc - */ -template -pointer malloc(const thrust::detail::execution_policy_base &system, std::size_t n); - - -/*! This version of \p malloc allocates typed uninitialized storage associated with a given system. - * - * \param system The Thrust system with which to associate the storage. - * \param n The number of elements of type \c T which the storage should accomodate. - * \return If allocation succeeds, a pointer to an allocation large enough to accomodate \c n - * elements of type \c T; a null pointer otherwise. - * The pointer must be deallocated with \p thrust::free. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * - * \pre \p DerivedPolicy must be publically derived from thrust::execution_policy. - * - * The following code snippet demonstrates how to use \p malloc to allocate a range of memory - * to accomodate integers associated with Thrust's device system. - * - * \code - * #include - * ... - * // allocate storage for 100 ints with thrust::malloc - * const int N = 100; - * thrust::device_system_tag device_sys; - * thrust::pointer ptr = thrust::malloc(device_sys, N); - * - * // manipulate memory - * ... - * - * // deallocate ptr with thrust::free - * thrust::free(device_sys, ptr); - * \endcode - * - * \see free - * \see device_malloc - */ -template -pointer malloc(const thrust::detail::execution_policy_base &system, std::size_t n); - - -/*! \p get_temporary_buffer returns a pointer to storage associated with a given Thrust system sufficient to store up to - * \p n objects of type \c T. If not enough storage is available to accomodate \p n objects, an implementation may return - * a smaller buffer. The number of objects the returned buffer can accomodate is also returned. - * - * Thrust uses \p get_temporary_buffer internally when allocating temporary storage required by algorithm implementations. - * - * The storage allocated with \p get_temporary_buffer must be returned to the system with \p return_temporary_buffer. - * - * \param system The Thrust system with which to associate the storage. - * \param n The requested number of objects of type \c T the storage should accomodate. - * \return A pair \c p such that p.first is a pointer to the allocated storage and p.second is the number of - * contiguous objects of type \c T that the storage can accomodate. If no storage can be allocated, p.first if - * no storage can be obtained. The storage must be returned to the system using \p return_temporary_buffer. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * - * \pre \p DerivedPolicy must be publically derived from thrust::execution_policy. - * - * The following code snippet demonstrates how to use \p get_temporary_buffer to allocate a range of memory - * to accomodate integers associated with Thrust's device system. - * - * \code - * #include - * ... - * // allocate storage for 100 ints with thrust::get_temporary_buffer - * const int N = 100; - * - * typedef thrust::pair< - * thrust::pointer, - * std::ptrdiff_t - * > ptr_and_size_t; - * - * thrust::device_system_tag device_sys; - * ptr_and_size_t ptr_and_size = thrust::get_temporary_buffer(device_sys, N); - * - * // manipulate up to 100 ints - * for(int i = 0; i < ptr_and_size.second; ++i) - * { - * *ptr_and_size.first = i; - * } - * - * // deallocate storage with thrust::return_temporary_buffer - * thrust::return_temporary_buffer(device_sys, ptr_and_size.first); - * \endcode - * - * \see malloc - * \see return_temporary_buffer - */ -template -thrust::pair, typename thrust::pointer::difference_type> -get_temporary_buffer(const thrust::detail::execution_policy_base &system, typename thrust::pointer::difference_type n); - - -/*! \} allocation_functions - */ - - -/*! \addtogroup deallocation_functions - * \{ - */ - - -/*! \p free deallocates the storage previously allocated by \p thrust::malloc. - * - * \param system The Thrust system with which the storage is associated. - * \param ptr A pointer previously returned by \p thrust::malloc. If \p ptr is null, \p free - * does nothing. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * - * \pre \p ptr shall have been returned by a previous call to thrust::malloc(system, n) or thrust::malloc(system, n) for some type \c T. - * - * The following code snippet demonstrates how to use \p free to deallocate a range of memory - * previously allocated with \p thrust::malloc. - * - * \code - * #include - * ... - * // allocate storage for 100 ints with thrust::malloc - * const int N = 100; - * thrust::device_system_tag device_sys; - * thrust::pointer ptr = thrust::malloc(device_sys, N); - * - * // mainpulate memory - * ... - * - * // deallocate ptr with thrust::free - * thrust::free(device_sys, ptr); - * \endcode - */ -template -void free(const thrust::detail::execution_policy_base &system, Pointer ptr); - - -/*! \p return_temporary_buffer deallocates storage associated with a given Thrust system previously allocated by \p get_temporary_buffer. - * - * Thrust uses \p return_temporary_buffer internally when deallocating temporary storage required by algorithm implementations. - * - * \param system The Thrust system with which the storage is associated. - * \param p A pointer previously returned by \p thrust::get_temporary_buffer. If \p ptr is null, \p return_temporary_buffer does nothing. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * - * \pre \p p shall have been previously allocated by \p thrust::get_temporary_buffer. - * - * The following code snippet demonstrates how to use \p return_temporary_buffer to deallocate a range of memory - * previously allocated by \p get_temporary_buffer. - * - * \code - * #include - * ... - * // allocate storage for 100 ints with thrust::get_temporary_buffer - * const int N = 100; - * - * typedef thrust::pair< - * thrust::pointer, - * std::ptrdiff_t - * > ptr_and_size_t; - * - * thrust::device_system_tag device_sys; - * ptr_and_size_t ptr_and_size = thrust::get_temporary_buffer(device_sys, N); - * - * // manipulate up to 100 ints - * for(int i = 0; i < ptr_and_size.second; ++i) - * { - * *ptr_and_size.first = i; - * } - * - * // deallocate storage with thrust::return_temporary_buffer - * thrust::return_temporary_buffer(device_sys, ptr_and_size.first); - * \endcode - * - * \see free - * \see get_temporary_buffer - */ -template -void return_temporary_buffer(const thrust::detail::execution_policy_base &system, Pointer p); - - -/*! \} deallocation_functions - */ - - -/*! \p raw_pointer_cast creates a "raw" pointer from a pointer-like type, - * simply returning the wrapped pointer, should it exist. - * - * \param ptr The pointer of interest. - * \return ptr.get(), if the expression is well formed; ptr, otherwise. - * \see raw_reference_cast - */ -template -__host__ __device__ -inline typename thrust::detail::pointer_traits::raw_pointer - raw_pointer_cast(const Pointer &ptr); - - -/*! \p raw_reference_cast creates a "raw" reference from a wrapped reference type, - * simply returning the underlying reference, should it exist. - * - * If the argument is not a reference wrapper, the result is a reference to the argument. - * - * \param ref The reference of interest. - * \return *thrust::raw_pointer_cast(&ref). - * \note There are two versions of \p raw_reference_cast. One for const references, - * and one for non-const. - * \see raw_pointer_cast - */ -template -__host__ __device__ -inline typename detail::raw_reference::type - raw_reference_cast(T &ref); - - -/*! \p raw_reference_cast creates a "raw" reference from a wrapped reference type, - * simply returning the underlying reference, should it exist. - * - * If the argument is not a reference wrapper, the result is a reference to the argument. - * - * \param ref The reference of interest. - * \return *thrust::raw_pointer_cast(&ref). - * \note There are two versions of \p raw_reference_cast. One for const references, - * and one for non-const. - * \see raw_pointer_cast - */ -template -__host__ __device__ -inline typename detail::raw_reference::type - raw_reference_cast(const T &ref); - - -/*! \} - */ - -} // end thrust - diff --git a/compat/thrust/merge.h b/compat/thrust/merge.h deleted file mode 100644 index e5fa7b47ac..0000000000 --- a/compat/thrust/merge.h +++ /dev/null @@ -1,676 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file merge.h - * \brief Merging sorted ranges - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup merging Merging - * \ingroup algorithms - * \{ - */ - - -/*! \p merge combines two sorted ranges [first1, last1) and [first2, last2) - * into a single sorted range. That is, it copies from [first1, last1) and - * [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)) - * such that the resulting range is in ascending order. \p merge is stable, meaning both that the - * relative order of elements within each input range is preserved, and that for equivalent elements - * in both input ranges the element from the first range precedes the element from the second. The - * return value is result + (last1 - first1) + (last2 - first2). - * - * This version of \p merge compares elements using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the merged output. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use - * \p merge to compute the merger of two sorted sets of integers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A1[6] = {1, 3, 5, 7, 9, 11}; - * int A2[7] = {1, 1, 2, 3, 5, 8, 13}; - * - * int result[13]; - * - * int *result_end = - * thrust::merge(thrust::host, - * A1, A1 + 6, - * A2, A2 + 7, - * result); - * // result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13} - * \endcode - * - * \see http://www.sgi.com/tech/stl/merge.html - * \see \p set_union - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator merge(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p merge combines two sorted ranges [first1, last1) and [first2, last2) - * into a single sorted range. That is, it copies from [first1, last1) and - * [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)) - * such that the resulting range is in ascending order. \p merge is stable, meaning both that the - * relative order of elements within each input range is preserved, and that for equivalent elements - * in both input ranges the element from the first range precedes the element from the second. The - * return value is result + (last1 - first1) + (last2 - first2). - * - * This version of \p merge compares elements using \c operator<. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the merged output. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use - * \p merge to compute the merger of two sorted sets of integers. - * - * \code - * #include - * ... - * int A1[6] = {1, 3, 5, 7, 9, 11}; - * int A2[7] = {1, 1, 2, 3, 5, 8, 13}; - * - * int result[13]; - * - * int *result_end = thrust::merge(A1, A1 + 6, A2, A2 + 7, result); - * // result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13} - * \endcode - * - * \see http://www.sgi.com/tech/stl/merge.html - * \see \p set_union - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator merge(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p merge combines two sorted ranges [first1, last1) and [first2, last2) - * into a single sorted range. That is, it copies from [first1, last1) and - * [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)) - * such that the resulting range is in ascending order. \p merge is stable, meaning both that the - * relative order of elements within each input range is preserved, and that for equivalent elements - * in both input ranges the element from the first range precedes the element from the second. The - * return value is result + (last1 - first1) + (last2 - first2). - * - * This version of \p merge compares elements using a function object \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the merged output. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use - * \p merge to compute the merger of two sets of integers sorted in - * descending order using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A1[6] = {11, 9, 7, 5, 3, 1}; - * int A2[7] = {13, 8, 5, 3, 2, 1, 1}; - * - * int result[13]; - * - * int *result_end = thrust::merge(thrust::host, - * A1, A1 + 6, - * A2, A2 + 7, - * result, - * thrust::greater()); - * // result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/merge.html - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator merge(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p merge combines two sorted ranges [first1, last1) and [first2, last2) - * into a single sorted range. That is, it copies from [first1, last1) and - * [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)) - * such that the resulting range is in ascending order. \p merge is stable, meaning both that the - * relative order of elements within each input range is preserved, and that for equivalent elements - * in both input ranges the element from the first range precedes the element from the second. The - * return value is result + (last1 - first1) + (last2 - first2). - * - * This version of \p merge compares elements using a function object \p comp. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the merged output. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use - * \p merge to compute the merger of two sets of integers sorted in - * descending order. - * - * \code - * #include - * #include - * ... - * int A1[6] = {11, 9, 7, 5, 3, 1}; - * int A2[7] = {13, 8, 5, 3, 2, 1, 1}; - * - * int result[13]; - * - * int *result_end = thrust::merge(A1, A1 + 6, A2, A2 + 7, result, thrust::greater()); - * // result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/merge.html - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator merge(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from - * [keys_first1, keys_last1) and [keys_first2, keys_last2) into a single range, - * [keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that - * the resulting range is in ascending key order. - * - * At the same time, \p merge_by_key copies elements from the two associated ranges [values_first1 + (keys_last1 - keys_first1)) - * and [values_first2 + (keys_last2 - keys_first2)) into a single range, - * [values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that - * the resulting range is in ascending order implied by each input element's associated key. - * - * \p merge_by_key is stable, meaning both that the relative order of elements within each input range is - * preserved, and that for equivalent elements in all input key ranges the element from the first range - * precedes the element from the second. - * - * The return value is is (keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) - * and (values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the merged output range of keys. - * \param values_result The beginning of the merged output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use - * \p merge_by_key to compute the merger of two sets of integers sorted in - * ascending order using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A_keys[6] = {1, 3, 5, 7, 9, 11}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0}; - * - * int B_keys[7] = {1, 1, 2, 3, 5, 8, 13}; - * int B_vals[7] = {1, 1, 1, 1, 1, 1, 1}; - * - * int keys_result[13]; - * int vals_result[13]; - * - * thrust::pair end = - * thrust::merge_by_key(thrust::host, - * A_keys, A_keys + 6, - * B_keys, B_keys + 7, - * A_vals, B_vals, - * keys_result, vals_result); - * - * // keys_result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13} - * // vals_result = {0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1} - * \endcode - * - * \see merge - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - merge_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from - * [keys_first1, keys_last1) and [keys_first2, keys_last2) into a single range, - * [keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that - * the resulting range is in ascending key order. - * - * At the same time, \p merge_by_key copies elements from the two associated ranges [values_first1 + (keys_last1 - keys_first1)) - * and [values_first2 + (keys_last2 - keys_first2)) into a single range, - * [values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that - * the resulting range is in ascending order implied by each input element's associated key. - * - * \p merge_by_key is stable, meaning both that the relative order of elements within each input range is - * preserved, and that for equivalent elements in all input key ranges the element from the first range - * precedes the element from the second. - * - * The return value is is (keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) - * and (values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)). - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the merged output range of keys. - * \param values_result The beginning of the merged output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use - * \p merge_by_key to compute the merger of two sets of integers sorted in - * ascending order. - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {1, 3, 5, 7, 9, 11}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0}; - * - * int B_keys[7] = {1, 1, 2, 3, 5, 8, 13}; - * int B_vals[7] = {1, 1, 1, 1, 1, 1, 1}; - * - * int keys_result[13]; - * int vals_result[13]; - * - * thrust::pair end = thrust::merge_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, B_vals, keys_result, vals_result); - * - * // keys_result = {1, 1, 1, 2, 3, 3, 5, 5, 7, 8, 9, 11, 13} - * // vals_result = {0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1} - * \endcode - * - * \see merge - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - merge_by_key(InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from - * [keys_first1, keys_last1) and [keys_first2, keys_last2) into a single range, - * [keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that - * the resulting range is in ascending key order. - * - * At the same time, \p merge_by_key copies elements from the two associated ranges [values_first1 + (keys_last1 - keys_first1)) - * and [values_first2 + (keys_last2 - keys_first2)) into a single range, - * [values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that - * the resulting range is in ascending order implied by each input element's associated key. - * - * \p merge_by_key is stable, meaning both that the relative order of elements within each input range is - * preserved, and that for equivalent elements in all input key ranges the element from the first range - * precedes the element from the second. - * - * The return value is is (keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) - * and (values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)). - * - * This version of \p merge_by_key compares key elements using a function object \p comp. - * - * The algorithm's execution is parallelized using \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the merged output range of keys. - * \param values_result The beginning of the merged output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use - * \p merge_by_key to compute the merger of two sets of integers sorted in - * descending order using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A_keys[6] = {11, 9, 7, 5, 3, 1}; - * int A_vals[6] = { 0, 0, 0, 0, 0, 0}; - * - * int B_keys[7] = {13, 8, 5, 3, 2, 1, 1}; - * int B_vals[7] = { 1, 1, 1, 1, 1, 1, 1}; - * - * int keys_result[13]; - * int vals_result[13]; - * - * thrust::pair end = - * thrust::merge_by_key(thrust::host, - * A_keys, A_keys + 6, - * B_keys, B_keys + 7, - * A_vals, B_vals, - * keys_result, vals_result, - * thrust::greater()); - * - * // keys_result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1} - * // vals_result = { 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1} - * \endcode - * - * \see merge - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - merge_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - Compare comp); - - -/*! \p merge_by_key performs a key-value merge. That is, \p merge_by_key copies elements from - * [keys_first1, keys_last1) and [keys_first2, keys_last2) into a single range, - * [keys_result, keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that - * the resulting range is in ascending key order. - * - * At the same time, \p merge_by_key copies elements from the two associated ranges [values_first1 + (keys_last1 - keys_first1)) - * and [values_first2 + (keys_last2 - keys_first2)) into a single range, - * [values_result, values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) such that - * the resulting range is in ascending order implied by each input element's associated key. - * - * \p merge_by_key is stable, meaning both that the relative order of elements within each input range is - * preserved, and that for equivalent elements in all input key ranges the element from the first range - * precedes the element from the second. - * - * The return value is is (keys_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)) - * and (values_result + (keys_last1 - keys_first1) + (keys_last2 - keys_first2)). - * - * This version of \p merge_by_key compares key elements using a function object \p comp. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the merged output range of keys. - * \param values_result The beginning of the merged output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator1's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use - * \p merge_by_key to compute the merger of two sets of integers sorted in - * descending order. - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {11, 9, 7, 5, 3, 1}; - * int A_vals[6] = { 0, 0, 0, 0, 0, 0}; - * - * int B_keys[7] = {13, 8, 5, 3, 2, 1, 1}; - * int B_vals[7] = { 1, 1, 1, 1, 1, 1, 1}; - * - * int keys_result[13]; - * int vals_result[13]; - * - * thrust::pair end = thrust::merge_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, B_vals, keys_result, vals_result, thrust::greater()); - * - * // keys_result = {13, 11, 9, 8, 7, 5, 5, 3, 3, 2, 1, 1, 1} - * // vals_result = { 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1} - * \endcode - * - * \see merge - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - merge_by_key(InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \} // merging - */ - -} // end thrust - -#include - diff --git a/compat/thrust/mismatch.h b/compat/thrust/mismatch.h deleted file mode 100644 index 898157ac90..0000000000 --- a/compat/thrust/mismatch.h +++ /dev/null @@ -1,258 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file mismatch.h - * \brief Search for differences between ranges - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup algorithms - */ - -/*! \addtogroup searching - * \ingroup algorithms - * \{ - */ - - -/*! \p mismatch finds the first position where the two ranges [first1, last1) - * and [first2, first2 + (last1 - first1)) differ. The two versions of - * \p mismatch use different tests for whether elements differ. - * - * This version of \p mismatch finds the first iterator \c i in [first1, last1) - * such that *i == *(first2 + (i - first1)) is \c false. The return value is a - * \c pair whose first element is \c i and whose second element is *(first2 + (i - first1)). - * If no such iterator \c i exists, the return value is a \c pair whose first element - * is \c last1 and whose second element is *(first2 + (last1 - first1)). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \return The first position where the sequences differ. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator - * and \p InputIterator1's \c value_type is equality comparable to \p InputIterator2's \c value_type. - * \tparam InputIterator2 is a model of Input Iterator. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector vec1(4); - * thrust::device_vector vec2(4); - * - * vec1[0] = 0; vec2[0] = 0; - * vec1[1] = 5; vec2[1] = 5; - * vec1[2] = 3; vec2[2] = 8; - * vec1[3] = 7; vec2[3] = 7; - * - * typedef thrust::device_vector::iterator Iterator; - * thrust::pair result; - * - * result = thrust::mismatch(thrust::device, vec1.begin(), vec1.end(), vec2.begin()); - * - * // result.first is vec1.begin() + 2 - * // result.second is vec2.begin() + 2 - * \endcode - * - * \see find - * \see find_if - */ -template -thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2); - - -/*! \p mismatch finds the first position where the two ranges [first1, last1) - * and [first2, first2 + (last1 - first1)) differ. The two versions of - * \p mismatch use different tests for whether elements differ. - * - * This version of \p mismatch finds the first iterator \c i in [first1, last1) - * such that *i == *(first2 + (i - first1)) is \c false. The return value is a - * \c pair whose first element is \c i and whose second element is *(first2 + (i - first1)). - * If no such iterator \c i exists, the return value is a \c pair whose first element - * is \c last1 and whose second element is *(first2 + (last1 - first1)). - * - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \return The first position where the sequences differ. - * - * \tparam InputIterator1 is a model of Input Iterator - * and \p InputIterator1's \c value_type is equality comparable to \p InputIterator2's \c value_type. - * \tparam InputIterator2 is a model of Input Iterator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector vec1(4); - * thrust::device_vector vec2(4); - * - * vec1[0] = 0; vec2[0] = 0; - * vec1[1] = 5; vec2[1] = 5; - * vec1[2] = 3; vec2[2] = 8; - * vec1[3] = 7; vec2[3] = 7; - * - * typedef thrust::device_vector::iterator Iterator; - * thrust::pair result; - * - * result = thrust::mismatch(vec1.begin(), vec1.end(), vec2.begin()); - * - * // result.first is vec1.begin() + 2 - * // result.second is vec2.begin() + 2 - * \endcode - * - * \see find - * \see find_if - */ -template -thrust::pair mismatch(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2); - - -/*! \p mismatch finds the first position where the two ranges [first1, last1) - * and [first2, first2 + (last1 - first1)) differ. The two versions of - * \p mismatch use different tests for whether elements differ. - * - * This version of \p mismatch finds the first iterator \c i in [first1, last1) - * such that pred(\*i, \*(first2 + (i - first1)) is \c false. The return value is a - * \c pair whose first element is \c i and whose second element is *(first2 + (i - first1)). - * If no such iterator \c i exists, the return value is a \c pair whose first element is - * \c last1 and whose second element is *(first2 + (last1 - first1)). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \param pred The binary predicate to compare elements. - * \return The first position where the sequences differ. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator. - * \tparam InputIterator2 is a model of Input Iterator. - * \tparam Predicate is a model of Input Iterator. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector vec1(4); - * thrust::device_vector vec2(4); - * - * vec1[0] = 0; vec2[0] = 0; - * vec1[1] = 5; vec2[1] = 5; - * vec1[2] = 3; vec2[2] = 8; - * vec1[3] = 7; vec2[3] = 7; - * - * typedef thrust::device_vector::iterator Iterator; - * thrust::pair result; - * - * result = thrust::mismatch(thrust::device, vec1.begin(), vec1.end(), vec2.begin(), thrust::equal_to()); - * - * // result.first is vec1.begin() + 2 - * // result.second is vec2.begin() + 2 - * \endcode - * - * \see find - * \see find_if - */ -template -thrust::pair mismatch(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - BinaryPredicate pred); - - -/*! \p mismatch finds the first position where the two ranges [first1, last1) - * and [first2, first2 + (last1 - first1)) differ. The two versions of - * \p mismatch use different tests for whether elements differ. - * - * This version of \p mismatch finds the first iterator \c i in [first1, last1) - * such that pred(\*i, \*(first2 + (i - first1)) is \c false. The return value is a - * \c pair whose first element is \c i and whose second element is *(first2 + (i - first1)). - * If no such iterator \c i exists, the return value is a \c pair whose first element is - * \c last1 and whose second element is *(first2 + (last1 - first1)). - * - * \param first1 The beginning of the first sequence. - * \param last1 The end of the first sequence. - * \param first2 The beginning of the second sequence. - * \param pred The binary predicate to compare elements. - * \return The first position where the sequences differ. - * - * \tparam InputIterator1 is a model of Input Iterator. - * \tparam InputIterator2 is a model of Input Iterator. - * \tparam Predicate is a model of Input Iterator. - * - * \code - * #include - * #include - * ... - * thrust::device_vector vec1(4); - * thrust::device_vector vec2(4); - * - * vec1[0] = 0; vec2[0] = 0; - * vec1[1] = 5; vec2[1] = 5; - * vec1[2] = 3; vec2[2] = 8; - * vec1[3] = 7; vec2[3] = 7; - * - * typedef thrust::device_vector::iterator Iterator; - * thrust::pair result; - * - * result = thrust::mismatch(vec1.begin(), vec1.end(), vec2.begin(), thrust::equal_to()); - * - * // result.first is vec1.begin() + 2 - * // result.second is vec2.begin() + 2 - * \endcode - * - * \see find - * \see find_if - */ -template -thrust::pair mismatch(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - BinaryPredicate pred); - -/*! \} // end searching - */ - -} // end namespace thrust - -#include - diff --git a/compat/thrust/pair.h b/compat/thrust/pair.h deleted file mode 100644 index 897cc078e0..0000000000 --- a/compat/thrust/pair.h +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file pair.h - * \brief A type encapsulating a heterogeneous pair of elements - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -/*! \addtogroup utility - * \{ - */ - -/*! \addtogroup pair - * \{ - */ - -/*! \p pair is a generic data structure encapsulating a heterogeneous - * pair of values. - * - * \tparam T1 The type of \p pair's first object type. There are no - * requirements on the type of \p T1. T1's type is - * provided by pair::first_type. - * - * \tparam T2 The type of \p pair's second object type. There are no - * requirements on the type of \p T2. T2's type is - * provided by pair::second_type. - */ -template - struct pair -{ - /*! \p first_type is the type of \p pair's first object type. - */ - typedef T1 first_type; - - /*! \p second_type is the type of \p pair's second object type. - */ - typedef T2 second_type; - - /*! The \p pair's first object. - */ - first_type first; - - /*! The \p pair's second object. - */ - second_type second; - - /*! \p pair's default constructor constructs \p first - * and \p second using \c first_type & \c second_type's - * default constructors, respectively. - */ - __host__ __device__ pair(void); - - /*! This constructor accepts two objects to copy into this \p pair. - * - * \param x The object to copy into \p first. - * \param y The object to copy into \p second. - */ - inline __host__ __device__ - pair(const T1 &x, const T2 &y); - - /*! This copy constructor copies from a \p pair whose types are - * convertible to this \p pair's \c first_type and \c second_type, - * respectively. - * - * \param p The \p pair to copy from. - * - * \tparam U1 is convertible to \c first_type. - * \tparam U2 is convertible to \c second_type. - */ - template - inline __host__ __device__ - pair(const pair &p); - - /*! This copy constructor copies from a std::pair whose types are - * convertible to this \p pair's \c first_type and \c second_type, - * respectively. - * - * \param p The std::pair to copy from. - * - * \tparam U1 is convertible to \c first_type. - * \tparam U2 is convertible to \c second_type. - */ - template - inline __host__ __device__ - pair(const std::pair &p); - - /*! \p swap swaps the elements of two pairs. - * - * \param p The other pair with which to swap. - */ - inline __host__ __device__ - void swap(pair &p); -}; // end pair - - -/*! This operator tests two \p pairs for equality. - * - * \param x The first \p pair to compare. - * \param y The second \p pair to compare. - * \return \c true if and only if x.first == y.first && x.second == y.second. - * - * \tparam T1 is a model of Equality Comparable. - * \tparam T2 is a model of Equality Comparable. - */ -template - inline __host__ __device__ - bool operator==(const pair &x, const pair &y); - - -/*! This operator tests two pairs for ascending ordering. - * - * \param x The first \p pair to compare. - * \param y The second \p pair to compare. - * \return \c true if and only if x.first < y.first || (!(y.first < x.first) && x.second < y.second). - * - * \tparam T1 is a model of LessThan Comparable. - * \tparam T2 is a model of LessThan Comparable. - */ -template - inline __host__ __device__ - bool operator<(const pair &x, const pair &y); - - -/*! This operator tests two pairs for inequality. - * - * \param x The first \p pair to compare. - * \param y The second \p pair to compare. - * \return \c true if and only if !(x == y). - * - * \tparam T1 is a model of Equality Comparable. - * \tparam T2 is a model of Equality Comparable. - */ -template - inline __host__ __device__ - bool operator!=(const pair &x, const pair &y); - - -/*! This operator tests two pairs for descending ordering. - * - * \param x The first \p pair to compare. - * \param y The second \p pair to compare. - * \return \c true if and only if y < x. - * - * \tparam T1 is a model of LessThan Comparable. - * \tparam T2 is a model of LessThan Comparable. - */ -template - inline __host__ __device__ - bool operator>(const pair &x, const pair &y); - - -/*! This operator tests two pairs for ascending ordering or equivalence. - * - * \param x The first \p pair to compare. - * \param y The second \p pair to compare. - * \return \c true if and only if !(y < x). - * - * \tparam T1 is a model of LessThan Comparable. - * \tparam T2 is a model of LessThan Comparable. - */ -template - inline __host__ __device__ - bool operator<=(const pair &x, const pair &y); - - -/*! This operator tests two pairs for descending ordering or equivalence. - * - * \param x The first \p pair to compare. - * \param y The second \p pair to compare. - * \return \c true if and only if !(x < y). - * - * \tparam T1 is a model of LessThan Comparable. - * \tparam T2 is a model of LessThan Comparable. - */ -template - inline __host__ __device__ - bool operator>=(const pair &x, const pair &y); - - -/*! \p swap swaps the contents of two pairs. - * - * \param x The first \p pair to swap. - * \param y The second \p pair to swap. - */ -template - inline __host__ __device__ - void swap(pair &x, pair &y); - - -/*! This convenience function creates a \p pair from two objects. - * - * \param x The first object to copy from. - * \param y The second object to copy from. - * \return A newly-constructed \p pair copied from \p a and \p b. - * - * \tparam T1 There are no requirements on the type of \p T1. - * \tparam T2 There are no requirements on the type of \p T2. - */ -template - inline __host__ __device__ - pair make_pair(T1 x, T2 y); - - -/*! This convenience metafunction is included for compatibility with - * \p tuple. It returns either the type of a \p pair's - * \c first_type or \c second_type in its nested type, \c type. - * - * \tparam N This parameter selects the member of interest. - * \tparam T A \c pair type of interest. - */ -template struct tuple_element; - - -/*! This convenience metafunction is included for compatibility with - * \p tuple. It returns \c 2, the number of elements of a \p pair, - * in its nested data member, \c value. - * - * \tparam Pair A \c pair type of interest. - */ -template struct tuple_size; - - -/*! This convenience function returns a reference to either the first or - * second member of a \p pair. - * - * \param p The \p pair of interest. - * \return \c p.first or \c p.second, depending on the template - * parameter. - * - * \tparam N This parameter selects the member of interest. - */ -// XXX comment out these prototypes as a WAR to a problem on MSVC 2005 -//template -// inline __host__ __device__ -// typename tuple_element >::type & -// get(pair &p); - - -/*! This convenience function returns a const reference to either the - * first or second member of a \p pair. - * - * \param p The \p pair of interest. - * \return \c p.first or \c p.second, depending on the template - * parameter. - * - * \tparam i This parameter selects the member of interest. - */ -// XXX comment out these prototypes as a WAR to a problem on MSVC 2005 -//template -// inline __host__ __device__ -// const typename tuple_element >::type & -// get(const pair &p); - -/*! \} // pair - */ - -/*! \} // utility - */ - -} // end thrust - -#include - diff --git a/compat/thrust/partition.h b/compat/thrust/partition.h deleted file mode 100644 index 61a6278a8d..0000000000 --- a/compat/thrust/partition.h +++ /dev/null @@ -1,1429 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file partition.h - * \brief Reorganizes a range based on a predicate - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup reordering - * \ingroup algorithms - * - * \addtogroup partitioning - * \ingroup reordering - * \{ - */ - - -/*! \p partition reorders the elements [first, last) based on the function - * object \p pred, such that all of the elements that satisfy \p pred precede the - * elements that fail to satisfy it. The postcondition is that, for some iterator - * \c middle in the range [first, last), pred(*i) is \c true for every - * iterator \c i in the range [first,middle) and \c false for every iterator - * \c i in the range [middle, last). The return value of \p partition is - * \c middle. - * - * Note that the relative order of elements in the two reordered sequences is not - * necessarily the same as it was in the original sequence. A different algorithm, - * \ref stable_partition, does guarantee to preserve the relative order. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence to reorder. - * \param last The end of the sequence to reorder. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return An iterator referring to the first element of the second partition, that is, - * the sequence of the elements which do not satisfy \p pred. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type, - * and \p ForwardIterator is mutable. - * \tparam Predicate is a model of Predicate. - * - * The following code snippet demonstrates how to use \p partition to reorder a - * sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * const int N = sizeof(A)/sizeof(int); - * thrust::partition(thrust::host, - * A, A + N, - * is_even()); - * // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partition.html - * \see \p stable_partition - * \see \p partition_copy - */ -template - ForwardIterator partition(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -/*! \p partition reorders the elements [first, last) based on the function - * object \p pred, such that all of the elements that satisfy \p pred precede the - * elements that fail to satisfy it. The postcondition is that, for some iterator - * \c middle in the range [first, last), pred(*i) is \c true for every - * iterator \c i in the range [first,middle) and \c false for every iterator - * \c i in the range [middle, last). The return value of \p partition is - * \c middle. - * - * Note that the relative order of elements in the two reordered sequences is not - * necessarily the same as it was in the original sequence. A different algorithm, - * \ref stable_partition, does guarantee to preserve the relative order. - * - * \param first The beginning of the sequence to reorder. - * \param last The end of the sequence to reorder. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return An iterator referring to the first element of the second partition, that is, - * the sequence of the elements which do not satisfy \p pred. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type, - * and \p ForwardIterator is mutable. - * \tparam Predicate is a model of Predicate. - * - * The following code snippet demonstrates how to use \p partition to reorder a - * sequence so that even numbers precede odd numbers. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * const int N = sizeof(A)/sizeof(int); - * thrust::partition(A, A + N, - * is_even()); - * // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partition.html - * \see \p stable_partition - * \see \p partition_copy - */ -template - ForwardIterator partition(ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -/*! \p partition reorders the elements [first, last) based on the function - * object \p pred applied to a stencil range [stencil, stencil + (last - first)), - * such that all of the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose - * corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator - * \c middle in the range [first, last), pred(*stencil_i) is \c true for every iterator - * \c stencil_i in the range [stencil,stencil + (middle - first)) and \c false for every iterator \c stencil_i - * in the range [stencil + (middle - first), stencil + (last - first)). - * The return value of \p stable_partition is \c middle. - * - * Note that the relative order of elements in the two reordered sequences is not - * necessarily the same as it was in the original sequence. A different algorithm, - * \ref stable_partition, does guarantee to preserve the relative order. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence to reorder. - * \param last The end of the sequence to reorder. - * \param stencil The beginning of the stencil sequence. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return An iterator referring to the first element of the second partition, that is, - * the sequence of the elements whose stencil elements do not satisfy \p pred. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \pre The ranges [first,last) and [stencil, stencil + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p partition to reorder a - * sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - * int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * const int N = sizeof(A)/sizeof(int); - * thrust::partition(thrust::host, A, A + N, S, is_even()); - * // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} - * // S is unmodified - * \endcode - * - * \see http://www.sgi.com/tech/stl/partition.html - * \see \p stable_partition - * \see \p partition_copy - */ -template - ForwardIterator partition(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - - -/*! \p partition reorders the elements [first, last) based on the function - * object \p pred applied to a stencil range [stencil, stencil + (last - first)), - * such that all of the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose - * corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator - * \c middle in the range [first, last), pred(*stencil_i) is \c true for every iterator - * \c stencil_i in the range [stencil,stencil + (middle - first)) and \c false for every iterator \c stencil_i - * in the range [stencil + (middle - first), stencil + (last - first)). - * The return value of \p stable_partition is \c middle. - * - * Note that the relative order of elements in the two reordered sequences is not - * necessarily the same as it was in the original sequence. A different algorithm, - * \ref stable_partition, does guarantee to preserve the relative order. - * - * \param first The beginning of the sequence to reorder. - * \param last The end of the sequence to reorder. - * \param stencil The beginning of the stencil sequence. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return An iterator referring to the first element of the second partition, that is, - * the sequence of the elements whose stencil elements do not satisfy \p pred. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \pre The ranges [first,last) and [stencil, stencil + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p partition to reorder a - * sequence so that even numbers precede odd numbers. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - * int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * const int N = sizeof(A)/sizeof(int); - * thrust::partition(A, A + N, S, is_even()); - * // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} - * // S is unmodified - * \endcode - * - * \see http://www.sgi.com/tech/stl/partition.html - * \see \p stable_partition - * \see \p partition_copy - */ -template - ForwardIterator partition(ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - - -/*! \p partition_copy differs from \ref partition only in that the reordered - * sequence is written to difference output sequences, rather than in place. - * - * \p partition_copy copies the elements [first, last) based on the - * function object \p pred. All of the elements that satisfy \p pred are copied - * to the range beginning at \p out_true and all the elements that fail to satisfy it - * are copied to the range beginning at \p out_false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence to reorder. - * \param last The end of the sequence to reorder. - * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. - * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return A \p pair p such that p.first is the end of the output range beginning - * at \p out_true and p.second is the end of the output range beginning at - * \p out_false. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type - * is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The input range shall not overlap with either output range. - * - * The following code snippet demonstrates how to use \p partition_copy to separate a - * sequence into two output sequences of even and odd numbers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * int result[10]; - * const int N = sizeof(A)/sizeof(int); - * int *evens = result; - * int *odds = result + 5; - * thrust::partition_copy(thrust::host, A, A + N, evens, odds, is_even()); - * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * // evens points to {2, 4, 6, 8, 10} - * // odds points to {1, 3, 5, 7, 9} - * \endcode - * - * \note The relative order of elements in the two reordered sequences is not - * necessarily the same as it was in the original sequence. A different algorithm, - * \ref stable_partition_copy, does guarantee to preserve the relative order. - * - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf - * \see \p stable_partition_copy - * \see \p partition - */ -template - thrust::pair - partition_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -/*! \p partition_copy differs from \ref partition only in that the reordered - * sequence is written to difference output sequences, rather than in place. - * - * \p partition_copy copies the elements [first, last) based on the - * function object \p pred. All of the elements that satisfy \p pred are copied - * to the range beginning at \p out_true and all the elements that fail to satisfy it - * are copied to the range beginning at \p out_false. - * - * \param first The beginning of the sequence to reorder. - * \param last The end of the sequence to reorder. - * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. - * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return A \p pair p such that p.first is the end of the output range beginning - * at \p out_true and p.second is the end of the output range beginning at - * \p out_false. - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type - * is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The input range shall not overlap with either output range. - * - * The following code snippet demonstrates how to use \p partition_copy to separate a - * sequence into two output sequences of even and odd numbers. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * int result[10]; - * const int N = sizeof(A)/sizeof(int); - * int *evens = result; - * int *odds = result + 5; - * thrust::partition_copy(A, A + N, evens, odds, is_even()); - * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * // evens points to {2, 4, 6, 8, 10} - * // odds points to {1, 3, 5, 7, 9} - * \endcode - * - * \note The relative order of elements in the two reordered sequences is not - * necessarily the same as it was in the original sequence. A different algorithm, - * \ref stable_partition_copy, does guarantee to preserve the relative order. - * - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf - * \see \p stable_partition_copy - * \see \p partition - */ -template - thrust::pair - partition_copy(InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -/*! \p partition_copy differs from \ref partition only in that the reordered - * sequence is written to difference output sequences, rather than in place. - * - * \p partition_copy copies the elements [first, last) based on the - * function object \p pred which is applied to a range of stencil elements. All of the elements - * whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true - * and all the elements whose stencil element fails to satisfy it are copied to the range beginning - * at \p out_false. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence to reorder. - * \param last The end of the sequence to reorder. - * \param stencil The beginning of the stencil sequence. - * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. - * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return A \p pair p such that p.first is the end of the output range beginning - * at \p out_true and p.second is the end of the output range beginning at - * \p out_false. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The input ranges shall not overlap with either output range. - * - * The following code snippet demonstrates how to use \p partition_copy to separate a - * sequence into two output sequences of even and odd numbers using the \p thrust::host execution - * policy for parallelization. - * - * \code - * #include - * #include - * #include - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - * int result[10]; - * const int N = sizeof(A)/sizeof(int); - * int *evens = result; - * int *odds = result + 5; - * thrust::stable_partition_copy(thrust::host, A, A + N, S, evens, odds, thrust::identity()); - * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0, 1} - * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * // evens points to {2, 4, 6, 8, 10} - * // odds points to {1, 3, 5, 7, 9} - * \endcode - * - * \note The relative order of elements in the two reordered sequences is not - * necessarily the same as it was in the original sequence. A different algorithm, - * \ref stable_partition_copy, does guarantee to preserve the relative order. - * - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf - * \see \p stable_partition_copy - * \see \p partition - */ -template - thrust::pair - partition_copy(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -/*! \p partition_copy differs from \ref partition only in that the reordered - * sequence is written to difference output sequences, rather than in place. - * - * \p partition_copy copies the elements [first, last) based on the - * function object \p pred which is applied to a range of stencil elements. All of the elements - * whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true - * and all the elements whose stencil element fails to satisfy it are copied to the range beginning - * at \p out_false. - * - * \param first The beginning of the sequence to reorder. - * \param last The end of the sequence to reorder. - * \param stencil The beginning of the stencil sequence. - * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. - * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return A \p pair p such that p.first is the end of the output range beginning - * at \p out_true and p.second is the end of the output range beginning at - * \p out_false. - * - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The input ranges shall not overlap with either output range. - * - * The following code snippet demonstrates how to use \p partition_copy to separate a - * sequence into two output sequences of even and odd numbers. - * - * \code - * #include - * #include - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - * int result[10]; - * const int N = sizeof(A)/sizeof(int); - * int *evens = result; - * int *odds = result + 5; - * thrust::stable_partition_copy(A, A + N, S, evens, odds, thrust::identity()); - * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0, 1} - * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * // evens points to {2, 4, 6, 8, 10} - * // odds points to {1, 3, 5, 7, 9} - * \endcode - * - * \note The relative order of elements in the two reordered sequences is not - * necessarily the same as it was in the original sequence. A different algorithm, - * \ref stable_partition_copy, does guarantee to preserve the relative order. - * - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf - * \see \p stable_partition_copy - * \see \p partition - */ -template - thrust::pair - partition_copy(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -/*! \p stable_partition is much like \ref partition : it reorders the elements in the - * range [first, last) based on the function object \p pred, such that all of - * the elements that satisfy \p pred precede all of the elements that fail to satisfy - * it. The postcondition is that, for some iterator \p middle in the range - * [first, last), pred(*i) is \c true for every iterator \c i in the - * range [first,middle) and \c false for every iterator \c i in the range - * [middle, last). The return value of \p stable_partition is \c middle. - * - * \p stable_partition differs from \ref partition in that \p stable_partition is - * guaranteed to preserve relative order. That is, if \c x and \c y are elements in - * [first, last), and \c stencil_x and \c stencil_y are the stencil elements - * in corresponding positions within [stencil, stencil + (last - first)), - * and pred(stencil_x) == pred(stencil_y), and if \c x precedes - * \c y, then it will still be true after \p stable_partition that \c x precedes \c y. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element of the sequence to reorder. - * \param last One position past the last element of the sequence to reorder. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return An iterator referring to the first element of the second partition, that is, - * the sequence of the elements which do not satisfy pred. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type, - * and \p ForwardIterator is mutable. - * \tparam Predicate is a model of Predicate. - * - * The following code snippet demonstrates how to use \p stable_partition to reorder a - * sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * const int N = sizeof(A)/sizeof(int); - * thrust::stable_partition(thrust::host, - * A, A + N, - * is_even()); - * // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * \endcode - * - * \see http://www.sgi.com/tech/stl/stable_partition.html - * \see \p partition - * \see \p stable_partition_copy - */ -template - ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -/*! \p stable_partition is much like \ref partition : it reorders the elements in the - * range [first, last) based on the function object \p pred, such that all of - * the elements that satisfy \p pred precede all of the elements that fail to satisfy - * it. The postcondition is that, for some iterator \p middle in the range - * [first, last), pred(*i) is \c true for every iterator \c i in the - * range [first,middle) and \c false for every iterator \c i in the range - * [middle, last). The return value of \p stable_partition is \c middle. - * - * \p stable_partition differs from \ref partition in that \p stable_partition is - * guaranteed to preserve relative order. That is, if \c x and \c y are elements in - * [first, last), and \c stencil_x and \c stencil_y are the stencil elements - * in corresponding positions within [stencil, stencil + (last - first)), - * and pred(stencil_x) == pred(stencil_y), and if \c x precedes - * \c y, then it will still be true after \p stable_partition that \c x precedes \c y. - * - * \param first The first element of the sequence to reorder. - * \param last One position past the last element of the sequence to reorder. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return An iterator referring to the first element of the second partition, that is, - * the sequence of the elements which do not satisfy pred. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type, - * and \p ForwardIterator is mutable. - * \tparam Predicate is a model of Predicate. - * - * The following code snippet demonstrates how to use \p stable_partition to reorder a - * sequence so that even numbers precede odd numbers. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * const int N = sizeof(A)/sizeof(int); - * thrust::stable_partition(A, A + N, - * is_even()); - * // A is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * \endcode - * - * \see http://www.sgi.com/tech/stl/stable_partition.html - * \see \p partition - * \see \p stable_partition_copy - */ -template - ForwardIterator stable_partition(ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -/*! \p stable_partition is much like \p partition: it reorders the elements in the - * range [first, last) based on the function object \p pred applied to a stencil - * range [stencil, stencil + (last - first)), such that all of - * the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose - * corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator - * \c middle in the range [first, last), pred(*stencil_i) is \c true for every iterator - * \c stencil_i in the range [stencil,stencil + (middle - first)) and \c false for every iterator \c stencil_i - * in the range [stencil + (middle - first), stencil + (last - first)). - * The return value of \p stable_partition is \c middle. - * - * \p stable_partition differs from \ref partition in that \p stable_partition is - * guaranteed to preserve relative order. That is, if \c x and \c y are elements in - * [first, last), such that pred(x) == pred(y), and if \c x precedes - * \c y, then it will still be true after \p stable_partition that \c x precedes \c y. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element of the sequence to reorder. - * \param last One position past the last element of the sequence to reorder. - * \param stencil The beginning of the stencil sequence. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return An iterator referring to the first element of the second partition, that is, - * the sequence of the elements whose stencil elements do not satisfy \p pred. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [first, last) shall not overlap with the range [stencil, stencil + (last - first)). - * - * The following code snippet demonstrates how to use \p stable_partition to reorder a - * sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - * int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * const int N = sizeof(A)/sizeof(int); - * thrust::stable_partition(thrust::host, A, A + N, S, is_even()); - * // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} - * // S is unmodified - * \endcode - * - * \see http://www.sgi.com/tech/stl/stable_partition.html - * \see \p partition - * \see \p stable_partition_copy - */ -template - ForwardIterator stable_partition(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - - -/*! \p stable_partition is much like \p partition: it reorders the elements in the - * range [first, last) based on the function object \p pred applied to a stencil - * range [stencil, stencil + (last - first)), such that all of - * the elements whose corresponding stencil element satisfies \p pred precede all of the elements whose - * corresponding stencil element fails to satisfy it. The postcondition is that, for some iterator - * \c middle in the range [first, last), pred(*stencil_i) is \c true for every iterator - * \c stencil_i in the range [stencil,stencil + (middle - first)) and \c false for every iterator \c stencil_i - * in the range [stencil + (middle - first), stencil + (last - first)). - * The return value of \p stable_partition is \c middle. - * - * \p stable_partition differs from \ref partition in that \p stable_partition is - * guaranteed to preserve relative order. That is, if \c x and \c y are elements in - * [first, last), such that pred(x) == pred(y), and if \c x precedes - * \c y, then it will still be true after \p stable_partition that \c x precedes \c y. - * - * \param first The first element of the sequence to reorder. - * \param last One position past the last element of the sequence to reorder. - * \param stencil The beginning of the stencil sequence. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return An iterator referring to the first element of the second partition, that is, - * the sequence of the elements whose stencil elements do not satisfy \p pred. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [first, last) shall not overlap with the range [stencil, stencil + (last - first)). - * - * The following code snippet demonstrates how to use \p stable_partition to reorder a - * sequence so that even numbers precede odd numbers. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - * int S[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * const int N = sizeof(A)/sizeof(int); - * thrust::stable_partition(A, A + N, S, is_even()); - * // A is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} - * // S is unmodified - * \endcode - * - * \see http://www.sgi.com/tech/stl/stable_partition.html - * \see \p partition - * \see \p stable_partition_copy - */ -template - ForwardIterator stable_partition(ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - - -/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered - * sequence is written to different output sequences, rather than in place. - * - * \p stable_partition_copy copies the elements [first, last) based on the - * function object \p pred. All of the elements that satisfy \p pred are copied - * to the range beginning at \p out_true and all the elements that fail to satisfy it - * are copied to the range beginning at \p out_false. - * - * \p stable_partition_copy differs from \ref partition_copy in that - * \p stable_partition_copy is guaranteed to preserve relative order. That is, if - * \c x and \c y are elements in [first, last), such that - * pred(x) == pred(y), and if \c x precedes \c y, then it will still be true - * after \p stable_partition_copy that \c x precedes \c y in the output. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element of the sequence to reorder. - * \param last One position past the last element of the sequence to reorder. - * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. - * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return A \p pair p such that p.first is the end of the output range beginning - * at \p out_true and p.second is the end of the output range beginning at - * \p out_false. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type - * is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The input ranges shall not overlap with either output range. - * - * The following code snippet demonstrates how to use \p stable_partition_copy to - * reorder a sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * int result[10]; - * const int N = sizeof(A)/sizeof(int); - * int *evens = result; - * int *odds = result + 5; - * thrust::stable_partition_copy(thrust::host, A, A + N, evens, odds, is_even()); - * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * // evens points to {2, 4, 6, 8, 10} - * // odds points to {1, 3, 5, 7, 9} - * \endcode - * - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf - * \see \p partition_copy - * \see \p stable_partition - */ -template - thrust::pair - stable_partition_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered - * sequence is written to different output sequences, rather than in place. - * - * \p stable_partition_copy copies the elements [first, last) based on the - * function object \p pred. All of the elements that satisfy \p pred are copied - * to the range beginning at \p out_true and all the elements that fail to satisfy it - * are copied to the range beginning at \p out_false. - * - * \p stable_partition_copy differs from \ref partition_copy in that - * \p stable_partition_copy is guaranteed to preserve relative order. That is, if - * \c x and \c y are elements in [first, last), such that - * pred(x) == pred(y), and if \c x precedes \c y, then it will still be true - * after \p stable_partition_copy that \c x precedes \c y in the output. - * - * \param first The first element of the sequence to reorder. - * \param last One position past the last element of the sequence to reorder. - * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. - * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return A \p pair p such that p.first is the end of the output range beginning - * at \p out_true and p.second is the end of the output range beginning at - * \p out_false. - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type and \p InputIterator's \c value_type - * is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The input ranges shall not overlap with either output range. - * - * The following code snippet demonstrates how to use \p stable_partition_copy to - * reorder a sequence so that even numbers precede odd numbers. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * int result[10]; - * const int N = sizeof(A)/sizeof(int); - * int *evens = result; - * int *odds = result + 5; - * thrust::stable_partition_copy(A, A + N, evens, odds, is_even()); - * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * // evens points to {2, 4, 6, 8, 10} - * // odds points to {1, 3, 5, 7, 9} - * \endcode - * - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf - * \see \p partition_copy - * \see \p stable_partition - */ -template - thrust::pair - stable_partition_copy(InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered - * sequence is written to different output sequences, rather than in place. - * - * \p stable_partition_copy copies the elements [first, last) based on the - * function object \p pred which is applied to a range of stencil elements. All of the elements - * whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true - * and all the elements whose stencil element fails to satisfy it are copied to the range beginning - * at \p out_false. - * - * \p stable_partition_copy differs from \ref partition_copy in that - * \p stable_partition_copy is guaranteed to preserve relative order. That is, if - * \c x and \c y are elements in [first, last), such that - * pred(x) == pred(y), and if \c x precedes \c y, then it will still be true - * after \p stable_partition_copy that \c x precedes \c y in the output. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element of the sequence to reorder. - * \param last One position past the last element of the sequence to reorder. - * \param stencil The beginning of the stencil sequence. - * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. - * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return A \p pair p such that p.first is the end of the output range beginning - * at \p out_true and p.second is the end of the output range beginning at - * \p out_false. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The input ranges shall not overlap with either output range. - * - * The following code snippet demonstrates how to use \p stable_partition_copy to - * reorder a sequence so that even numbers precede odd numbers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - * int result[10]; - * const int N = sizeof(A)/sizeof(int); - * int *evens = result; - * int *odds = result + 5; - * thrust::stable_partition_copy(thrust::host, A, A + N, S, evens, odds, thrust::identity()); - * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0, 1} - * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * // evens points to {2, 4, 6, 8, 10} - * // odds points to {1, 3, 5, 7, 9} - * \endcode - * - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf - * \see \p partition_copy - * \see \p stable_partition - */ -template - thrust::pair - stable_partition_copy(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -/*! \p stable_partition_copy differs from \ref stable_partition only in that the reordered - * sequence is written to different output sequences, rather than in place. - * - * \p stable_partition_copy copies the elements [first, last) based on the - * function object \p pred which is applied to a range of stencil elements. All of the elements - * whose corresponding stencil element satisfies \p pred are copied to the range beginning at \p out_true - * and all the elements whose stencil element fails to satisfy it are copied to the range beginning - * at \p out_false. - * - * \p stable_partition_copy differs from \ref partition_copy in that - * \p stable_partition_copy is guaranteed to preserve relative order. That is, if - * \c x and \c y are elements in [first, last), such that - * pred(x) == pred(y), and if \c x precedes \c y, then it will still be true - * after \p stable_partition_copy that \c x precedes \c y in the output. - * - * \param first The first element of the sequence to reorder. - * \param last One position past the last element of the sequence to reorder. - * \param stencil The beginning of the stencil sequence. - * \param out_true The destination of the resulting sequence of elements which satisfy \p pred. - * \param out_false The destination of the resulting sequence of elements which fail to satisfy \p pred. - * \param pred A function object which decides to which partition each element of the - * sequence [first, last) belongs. - * \return A \p pair p such that p.first is the end of the output range beginning - * at \p out_true and p.second is the end of the output range beginning at - * \p out_false. - * - * \tparam InputIterator1 is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p OutputIterator1 and \p OutputIterator2's \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The input ranges shall not overlap with either output range. - * - * The following code snippet demonstrates how to use \p stable_partition_copy to - * reorder a sequence so that even numbers precede odd numbers. - * - * \code - * #include - * #include - * ... - * int A[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * int S[] = {0, 1, 0, 1, 0, 1, 0, 1, 0, 1}; - * int result[10]; - * const int N = sizeof(A)/sizeof(int); - * int *evens = result; - * int *odds = result + 5; - * thrust::stable_partition_copy(A, A + N, S, evens, odds, thrust::identity()); - * // A remains {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * // S remains {0, 1, 0, 1, 0, 1, 0, 1, 0, 1} - * // result is now {2, 4, 6, 8, 10, 1, 3, 5, 7, 9} - * // evens points to {2, 4, 6, 8, 10} - * // odds points to {1, 3, 5, 7, 9} - * \endcode - * - * \see http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2569.pdf - * \see \p partition_copy - * \see \p stable_partition - */ -template - thrust::pair - stable_partition_copy(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -/*! \} // end stream_compaction - */ - -/*! \} // end reordering - */ - -/*! \addtogroup searching - * \{ - */ - - -/*! \p partition_point returns an iterator pointing to the end of the true - * partition of a partitioned range. \p partition_point requires the input range - * [first,last) to be a partition; that is, all elements which satisfy - * pred shall appear before those that do not. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range to consider. - * \param last The end of the range to consider. - * \param pred A function object which decides to which partition each element of the - * range [first, last) belongs. - * \return An iterator \c mid such that all_of(first, mid, pred) - * and none_of(mid, last, pred) are both true. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [first, last) shall be partitioned by \p pred. - * - * \note Though similar, \p partition_point is not redundant with \p find_if_not. - * \p partition_point's precondition provides an opportunity for a - * faster implemention. - * - * \code - * #include - * #include - * - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * - * ... - * - * int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}; - * int * B = thrust::partition_point(thrust::host, A, A + 10, is_even()); - * // B - A is 5 - * // [A, B) contains only even values - * \endcode - * - * \see \p partition - * \see \p find_if_not - */ -template - ForwardIterator partition_point(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -/*! \p partition_point returns an iterator pointing to the end of the true - * partition of a partitioned range. \p partition_point requires the input range - * [first,last) to be a partition; that is, all elements which satisfy - * pred shall appear before those that do not. - * \param first The beginning of the range to consider. - * \param last The end of the range to consider. - * \param pred A function object which decides to which partition each element of the - * range [first, last) belongs. - * \return An iterator \c mid such that all_of(first, mid, pred) - * and none_of(mid, last, pred) are both true. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [first, last) shall be partitioned by \p pred. - * - * \note Though similar, \p partition_point is not redundant with \p find_if_not. - * \p partition_point's precondition provides an opportunity for a - * faster implemention. - * - * \code - * #include - * - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * - * ... - * - * int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}; - * int * B = thrust::partition_point(A, A + 10, is_even()); - * // B - A is 5 - * // [A, B) contains only even values - * \endcode - * - * \see \p partition - * \see \p find_if_not - */ -template - ForwardIterator partition_point(ForwardIterator first, - ForwardIterator last, - Predicate pred); - -/*! \} // searching - */ - -/*! \addtogroup reductions - * \{ - * \addtogroup predicates - * \{ - */ - - -/*! \p is_partitioned returns \c true if the given range - * is partitioned with respect to a predicate, and \c false otherwise. - * - * Specifically, \p is_partitioned returns \c true if [first, last) - * is empty of if [first, last) is partitioned by \p pred, i.e. if - * all elements that satisfy \p pred appear before those that do not. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range to consider. - * \param last The end of the range to consider. - * \param pred A function object which decides to which partition each element of the - * range [first, last) belongs. - * \return \c true if the range [first, last) is partitioned with respect - * to \p pred, or if [first, last) is empty. \c false, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \code - * #include - * #include - * - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * - * ... - * - * int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}; - * int B[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * - * thrust::is_partitioned(thrust::host, A, A + 10); // returns true - * thrust::is_partitioned(thrust::host, B, B + 10); // returns false - * \endcode - * - * \see \p partition - */ -template - bool is_partitioned(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - Predicate pred); - - -/*! \p is_partitioned returns \c true if the given range - * is partitioned with respect to a predicate, and \c false otherwise. - * - * Specifically, \p is_partitioned returns \c true if [first, last) - * is empty of if [first, last) is partitioned by \p pred, i.e. if - * all elements that satisfy \p pred appear before those that do not. - * - * \param first The beginning of the range to consider. - * \param last The end of the range to consider. - * \param pred A function object which decides to which partition each element of the - * range [first, last) belongs. - * \return \c true if the range [first, last) is partitioned with respect - * to \p pred, or if [first, last) is empty. \c false, otherwise. - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \code - * #include - * - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int &x) - * { - * return (x % 2) == 0; - * } - * }; - * - * ... - * - * int A[] = {2, 4, 6, 8, 10, 1, 3, 5, 7, 9}; - * int B[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - * - * thrust::is_partitioned(A, A + 10); // returns true - * thrust::is_partitioned(B, B + 10); // returns false - * \endcode - * - * \see \p partition - */ -template - bool is_partitioned(InputIterator first, - InputIterator last, - Predicate pred); - - -/*! \} // end predicates - * \} // end reductions - */ - - -} // end thrust - -#include - diff --git a/compat/thrust/random.h b/compat/thrust/random.h deleted file mode 100644 index 5a2c00da86..0000000000 --- a/compat/thrust/random.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file random.h - * \brief Pseudo-random number generators. - */ - -#pragma once - -#include -#include - -// RNGs -#include -#include -#include -#include -#include - -// distributions -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup random Random Number Generation - * \{ - */ - - -/*! \namespace thrust::random - * \brief \p thrust::random is the namespace which contains random number engine class templates, - * random number engine adaptor class templates, engines with predefined parameters, - * and random number distribution class templates. They are provided in a separate namespace - * for import convenience but are also aliased in the top-level \p thrust namespace for - * easy access. - */ -namespace random -{ - -/*! \addtogroup predefined_random Random Number Engines with Predefined Parameters - * \ingroup random - * \{ - */ - -/*! \typedef ranlux24 - * \brief A random number engine with predefined parameters which implements the - * RANLUX level-3 random number generation algorithm. - * \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux24 - * shall produce the value \c 9901578 . - */ -typedef discard_block_engine ranlux24; - - -/*! \typedef ranlux48 - * \brief A random number engine with predefined parameters which implements the - * RANLUX level-4 random number generation algorithm. - * \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux48 - * shall produce the value \c 88229545517833 . - */ -typedef discard_block_engine ranlux48; - - -/*! \typedef taus88 - * \brief A random number engine with predefined parameters which implements - * L'Ecuyer's 1996 three-component Tausworthe random number generator. - * - * \note The 10000th consecutive invocation of a default-constructed object of type \p taus88 - * shall produce the value \c 3535848941 . - */ -typedef xor_combine_engine< - linear_feedback_shift_engine, - 0, - xor_combine_engine< - linear_feedback_shift_engine, 0, - linear_feedback_shift_engine, 0 - >, - 0 -> taus88; - -/*! \typedef default_random_engine - * \brief An implementation-defined "default" random number engine. - * \note \p default_random_engine is currently an alias for \p minstd_rand, and may change - * in a future version. - */ -typedef minstd_rand default_random_engine; - -/*! \} // end predefined_random - */ - -} // end random - - -/*! \} // end random - */ - -// import names into thrust:: -using random::ranlux24; -using random::ranlux48; -using random::taus88; -using random::default_random_engine; - -} // end thrust - diff --git a/compat/thrust/random/detail/discard_block_engine.inl b/compat/thrust/random/detail/discard_block_engine.inl deleted file mode 100644 index 5f01bd1816..0000000000 --- a/compat/thrust/random/detail/discard_block_engine.inl +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -namespace thrust -{ - -namespace random -{ - - -template - discard_block_engine - ::discard_block_engine() - : m_e(), m_n(0) -{} - - -template - discard_block_engine - ::discard_block_engine(result_type s) - : m_e(s), m_n(0) -{} - - -template - discard_block_engine - ::discard_block_engine(const base_type &urng) - : m_e(urng), m_n(0) -{} - - -template - void discard_block_engine - ::seed(void) -{ - m_e.seed(); - m_n = 0; -} - - -template - void discard_block_engine - ::seed(result_type s) -{ - m_e.seed(s); - m_n = 0; -} - - -template - typename discard_block_engine::result_type - discard_block_engine - ::operator()(void) -{ - if(m_n >= used_block) - { - m_e.discard(block_size - m_n); -// for(; m_n < block_size; ++m_n) -// m_e(); - m_n = 0; - } - - ++m_n; - - return m_e(); -} - - -template - void discard_block_engine - ::discard(unsigned long long z) -{ - // XXX this should be accelerated - for(; z > 0; --z) - { - this->operator()(); - } // end for -} - - -template - const typename discard_block_engine::base_type & - discard_block_engine - ::base(void) const -{ - return m_e; -} - - -template - template - std::basic_ostream& discard_block_engine - ::stream_out(std::basic_ostream &os) const -{ - typedef std::basic_ostream ostream_type; - typedef typename ostream_type::ios_base ios_base; - - // save old flags & fill character - const typename ios_base::fmtflags flags = os.flags(); - const CharT fill = os.fill(); - - const CharT space = os.widen(' '); - os.flags(ios_base::dec | ios_base::fixed | ios_base::left); - os.fill(space); - - // output the base engine followed by n - os << m_e << space << m_n; - - // restore flags & fill character - os.flags(flags); - os.fill(fill); - - return os; -} - - -template - template - std::basic_istream& discard_block_engine - ::stream_in(std::basic_istream &is) -{ - typedef std::basic_istream istream_type; - typedef typename istream_type::ios_base ios_base; - - // save old flags - const typename ios_base::fmtflags flags = is.flags(); - - is.flags(ios_base::skipws); - - // input the base engine and then n - is >> m_e >> m_n; - - // restore old flags - is.flags(flags); - return is; -} - - -template - bool discard_block_engine - ::equal(const discard_block_engine &rhs) const -{ - return (m_e == rhs.m_e) && (m_n == rhs.m_n); -} - - -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const discard_block_engine &e) -{ - return thrust::random::detail::random_core_access::stream_out(os,e); -} - - -template -std::basic_istream& -operator>>(std::basic_istream &is, - discard_block_engine &e) -{ - return thrust::random::detail::random_core_access::stream_in(is,e); -} - - -template -bool operator==(const discard_block_engine &lhs, - const discard_block_engine &rhs) -{ - return thrust::random::detail::random_core_access::equal(lhs,rhs); -} - - -template -bool operator!=(const discard_block_engine &lhs, - const discard_block_engine &rhs) -{ - return !(lhs == rhs); -} - - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/linear_congruential_engine.inl b/compat/thrust/random/detail/linear_congruential_engine.inl deleted file mode 100644 index f040563cc2..0000000000 --- a/compat/thrust/random/detail/linear_congruential_engine.inl +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - - -template - linear_congruential_engine - ::linear_congruential_engine(result_type s) -{ - seed(s); -} // end linear_congruential_engine::linear_congruential_engine() - - -template - void linear_congruential_engine - ::seed(result_type s) -{ - if((detail::mod(c) == 0) && - (detail::mod(s) == 0)) - m_x = detail::mod(1); - else - m_x = detail::mod(s); -} // end linear_congruential_engine::seed() - - -template - typename linear_congruential_engine::result_type - linear_congruential_engine - ::operator()(void) -{ - m_x = detail::mod(m_x); - return m_x; -} // end linear_congruential_engine::operator()() - - -template - void linear_congruential_engine - ::discard(unsigned long long z) -{ - thrust::random::detail::linear_congruential_engine_discard::discard(*this,z); -} // end linear_congruential_engine::discard() - - -template - template - std::basic_ostream& linear_congruential_engine - ::stream_out(std::basic_ostream &os) const -{ - typedef std::basic_ostream ostream_type; - typedef typename ostream_type::ios_base ios_base; - - // save old flags & fill character - const typename ios_base::fmtflags flags = os.flags(); - const CharT fill = os.fill(); - - os.flags(ios_base::dec | ios_base::fixed | ios_base::left); - os.fill(os.widen(' ')); - - // output one word of state - os << m_x; - - // restore flags & fill character - os.flags(flags); - os.fill(fill); - - return os; -} - - -template - template - std::basic_istream& linear_congruential_engine - ::stream_in(std::basic_istream &is) -{ - typedef std::basic_istream istream_type; - typedef typename istream_type::ios_base ios_base; - - // save old flags - const typename ios_base::fmtflags flags = is.flags(); - - is.flags(ios_base::dec); - - // input one word of state - is >> m_x; - - // restore flags - is.flags(flags); - - return is; -} - - -template -bool linear_congruential_engine - ::equal(const linear_congruential_engine &rhs) const -{ - return m_x == rhs.m_x; -} - - -template -__host__ __device__ -bool operator==(const linear_congruential_engine &lhs, - const linear_congruential_engine &rhs) -{ - return detail::random_core_access::equal(lhs,rhs); -} - - -template -bool operator!=(const linear_congruential_engine &lhs, - const linear_congruential_engine &rhs) -{ - return !(lhs == rhs); -} - - -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const linear_congruential_engine &e) -{ - return detail::random_core_access::stream_out(os,e); -} - - -template -std::basic_istream& -operator>>(std::basic_istream &is, - linear_congruential_engine &e) -{ - return detail::random_core_access::stream_in(is,e); -} - - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/linear_congruential_engine_discard.h b/compat/thrust/random/detail/linear_congruential_engine_discard.h deleted file mode 100644 index f4ec23364e..0000000000 --- a/compat/thrust/random/detail/linear_congruential_engine_discard.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -namespace random -{ - -namespace detail -{ - - -template - struct linear_congruential_engine_discard_implementation -{ - __host__ __device__ - static void discard(UIntType &state, unsigned long long z) - { - for(; z > 0; --z) - { - state = detail::mod(state); - } - } -}; // end linear_congruential_engine_discard - - -// specialize for small integers and c == 0 -// XXX figure out a robust implemenation of this for any unsigned integer type later -template - struct linear_congruential_engine_discard_implementation -{ - __host__ __device__ - static void discard(thrust::detail::uint32_t &state, unsigned long long z) - { - const thrust::detail::uint32_t modulus = m; - - // XXX we need to use unsigned long long here or we will encounter overflow in the - // multiplies below - // figure out a robust implementation of this later - unsigned long long multiplier = a; - unsigned long long multiplier_to_z = 1; - - // see http://en.wikipedia.org/wiki/Modular_exponentiation - while(z > 0) - { - if(z & 1) - { - // multiply in this bit's contribution while using modulus to keep result small - multiplier_to_z = (multiplier_to_z * multiplier) % modulus; - } - - // move to the next bit of the exponent, square (and mod) the base accordingly - z >>= 1; - multiplier = (multiplier * multiplier) % modulus; - } - - state = static_cast((multiplier_to_z * state) % modulus); - } -}; // end linear_congruential_engine_discard - - -struct linear_congruential_engine_discard -{ - template - __host__ __device__ - static void discard(LinearCongruentialEngine &lcg, unsigned long long z) - { - typedef typename LinearCongruentialEngine::result_type result_type; - const result_type c = LinearCongruentialEngine::increment; - const result_type a = LinearCongruentialEngine::multiplier; - const result_type m = LinearCongruentialEngine::modulus; - - // XXX WAR unused variable warnings - (void) c; - (void) a; - (void) m; - - linear_congruential_engine_discard_implementation::discard(lcg.m_x, z); - } -}; // end linear_congruential_engine_discard - - -} // end detail - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/linear_feedback_shift_engine.inl b/compat/thrust/random/detail/linear_feedback_shift_engine.inl deleted file mode 100644 index 4e8dad5140..0000000000 --- a/compat/thrust/random/detail/linear_feedback_shift_engine.inl +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -namespace thrust -{ - -namespace random -{ - -template - linear_feedback_shift_engine - ::linear_feedback_shift_engine(result_type value) -{ - seed(value); -} // end linear_feedback_shift_engine::linear_feedback_shift_engine() - -template - void linear_feedback_shift_engine - ::seed(result_type value) -{ - m_value = value; -} // end linear_feedback_shift_engine::seed() - -template - typename linear_feedback_shift_engine::result_type - linear_feedback_shift_engine - ::operator()(void) -{ - const UIntType b = (((m_value << q) ^ m_value) & wordmask) >> (k-s); - const UIntType mask = ( (~static_cast(0)) << (w-k) ) & wordmask; - m_value = ((m_value & mask) << s) ^ b; - return m_value; -} // end linear_feedback_shift_engine::operator()() - - -template - void linear_feedback_shift_engine - ::discard(unsigned long long z) -{ - for(; z > 0; --z) - { - this->operator()(); - } // end for -} // end linear_feedback_shift_engine::discard() - - -template - template - std::basic_ostream& linear_feedback_shift_engine - ::stream_out(std::basic_ostream &os) const -{ - typedef std::basic_ostream ostream_type; - typedef typename ostream_type::ios_base ios_base; - - // save old flags & fill character - const typename ios_base::fmtflags flags = os.flags(); - const CharT fill = os.fill(); - - os.flags(ios_base::dec | ios_base::fixed | ios_base::left); - os.fill(os.widen(' ')); - - // output one word of state - os << m_value; - - // restore flags & fill character - os.flags(flags); - os.fill(fill); - - return os; -} - - -template - template - std::basic_istream& linear_feedback_shift_engine - ::stream_in(std::basic_istream &is) -{ - typedef std::basic_istream istream_type; - typedef typename istream_type::ios_base ios_base; - - // save old flags - const typename ios_base::fmtflags flags = is.flags(); - - is.flags(ios_base::skipws); - - // input one word of state - is >> m_value; - - // restore flags - is.flags(flags); - - return is; -} - - -template - bool linear_feedback_shift_engine - ::equal(const linear_feedback_shift_engine &rhs) const -{ - return m_value == rhs.m_value; -} - - -template -bool operator==(const linear_feedback_shift_engine &lhs, - const linear_feedback_shift_engine &rhs) -{ - return thrust::random::detail::random_core_access::equal(lhs,rhs); -} - - -template -bool operator!=(const linear_feedback_shift_engine &lhs, - const linear_feedback_shift_engine &rhs) -{ - return !(lhs == rhs); -} - - -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const linear_feedback_shift_engine &e) -{ - return thrust::random::detail::random_core_access::stream_out(os,e); -} - - -template -std::basic_istream& -operator>>(std::basic_istream &is, - linear_feedback_shift_engine &e) -{ - return thrust::random::detail::random_core_access::stream_in(is,e); -} - - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h b/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h deleted file mode 100644 index ed9e51e925..0000000000 --- a/compat/thrust/random/detail/linear_feedback_shift_engine_wordmask.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -namespace thrust -{ - -namespace random -{ - -namespace detail -{ - -template - struct linear_feedback_shift_engine_wordmask -{ - static const T value = - (T(1u) << i) | - linear_feedback_shift_engine_wordmask::value; -}; // end linear_feedback_shift_engine_wordmask - -template - struct linear_feedback_shift_engine_wordmask -{ - static const T value = 0; -}; // end linear_feedback_shift_engine_wordmask - -} // end detail - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/mod.h b/compat/thrust/random/detail/mod.h deleted file mode 100644 index ceb2191552..0000000000 --- a/compat/thrust/random/detail/mod.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -namespace thrust -{ - -namespace random -{ - -namespace detail -{ - -template - struct static_mod -{ - static const T q = m / a; - static const T r = m % a; - - __host__ __device__ - T operator()(T x) const - { - if(a == 1) - { - x %= m; - } - else - { - T t1 = a * (x % q); - T t2 = r * (x / q); - if(t1 >= t2) - { - x = t1 - t2; - } - else - { - x = m - t2 + t1; - } - } - - if(c != 0) - { - const T d = m - x; - if(d > c) - { - x += c; - } - else - { - x = c - d; - } - } - - return x; - } -}; // end static_mod - - -// Rely on machine overflow handling -template - struct static_mod -{ - __host__ __device__ - T operator()(T x) const - { - return a * x + c; - } -}; // end static_mod - -template -__host__ __device__ - T mod(T x) -{ - static_mod f; - return f(x); -} // end static_mod - -} // end detail - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/normal_distribution.inl b/compat/thrust/random/detail/normal_distribution.inl deleted file mode 100644 index 1bb55d75b2..0000000000 --- a/compat/thrust/random/detail/normal_distribution.inl +++ /dev/null @@ -1,241 +0,0 @@ -/* - * - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -// for floating point infinity -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC -#include -#else -#include -#endif - -namespace thrust -{ - -namespace random -{ - - -template - normal_distribution - ::normal_distribution(RealType a, RealType b) - :super_t(),m_param(a,b) -{ -} // end normal_distribution::normal_distribution() - - -template - normal_distribution - ::normal_distribution(const param_type &parm) - :super_t(),m_param(parm) -{ -} // end normal_distribution::normal_distribution() - - -template - void normal_distribution - ::reset(void) -{ - super_t::reset(); -} // end normal_distribution::reset() - - -template - template - typename normal_distribution::result_type - normal_distribution - ::operator()(UniformRandomNumberGenerator &urng) -{ - return operator()(urng, m_param); -} // end normal_distribution::operator()() - - -template - template - typename normal_distribution::result_type - normal_distribution - ::operator()(UniformRandomNumberGenerator &urng, - const param_type &parm) -{ - return super_t::sample(urng, parm.first, parm.second); -} // end normal_distribution::operator()() - - -template - typename normal_distribution::param_type - normal_distribution - ::param(void) const -{ - return m_param; -} // end normal_distribution::param() - - -template - void normal_distribution - ::param(const param_type &parm) -{ - m_param = parm; -} // end normal_distribution::param() - - -template - typename normal_distribution::result_type - normal_distribution - ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const -{ - return -this->max(); -} // end normal_distribution::min() - - -template - typename normal_distribution::result_type - normal_distribution - ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const -{ - // XXX this solution is pretty terrible - // we can't use numeric_traits::max because nvcc will - // complain that it is a __host__ function - union - { - thrust::detail::uint32_t inf_as_int; - float result; - } hack; - - hack.inf_as_int = 0x7f800000u; - - return hack.result; -} // end normal_distribution::max() - - -template - typename normal_distribution::result_type - normal_distribution - ::mean(void) const -{ - return m_param.first; -} // end normal_distribution::mean() - - -template - typename normal_distribution::result_type - normal_distribution - ::stddev(void) const -{ - return m_param.second; -} // end normal_distribution::stddev() - - -template - bool normal_distribution - ::equal(const normal_distribution &rhs) const -{ - return m_param == rhs.param(); -} - - -template - template - std::basic_ostream& - normal_distribution - ::stream_out(std::basic_ostream &os) const -{ - typedef std::basic_ostream ostream_type; - typedef typename ostream_type::ios_base ios_base; - - // save old flags and fill character - const typename ios_base::fmtflags flags = os.flags(); - const CharT fill = os.fill(); - - const CharT space = os.widen(' '); - os.flags(ios_base::dec | ios_base::fixed | ios_base::left); - os.fill(space); - - os << mean() << space << stddev(); - - // restore old flags and fill character - os.flags(flags); - os.fill(fill); - return os; -} - - -template - template - std::basic_istream& - normal_distribution - ::stream_in(std::basic_istream &is) -{ - typedef std::basic_istream istream_type; - typedef typename istream_type::ios_base ios_base; - - // save old flags - const typename ios_base::fmtflags flags = is.flags(); - - is.flags(ios_base::skipws); - - is >> m_param.first >> m_param.second; - - // restore old flags - is.flags(flags); - return is; -} - - -template -bool operator==(const normal_distribution &lhs, - const normal_distribution &rhs) -{ - return thrust::random::detail::random_core_access::equal(lhs,rhs); -} - - -template -bool operator!=(const normal_distribution &lhs, - const normal_distribution &rhs) -{ - return !(lhs == rhs); -} - - -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const normal_distribution &d) -{ - return thrust::random::detail::random_core_access::stream_out(os,d); -} - - -template -std::basic_istream& -operator>>(std::basic_istream &is, - normal_distribution &d) -{ - return thrust::random::detail::random_core_access::stream_in(is,d); -} - - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/normal_distribution_base.h b/compat/thrust/random/detail/normal_distribution_base.h deleted file mode 100644 index d9166112ad..0000000000 --- a/compat/thrust/random/detail/normal_distribution_base.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * Copyright Jens Maurer 2000-2001 - * Distributed under the Boost Software License, Version 1.0. (See - * accompanying file LICENSE_1_0.txt or copy at - * http://www.boost.org/LICENSE_1_0.txt) - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace random -{ -namespace detail -{ - -// this version samples the normal distribution directly -// and uses the non-standard math function erfcinv -template - class normal_distribution_nvcc -{ - protected: - template - __host__ __device__ - RealType sample(UniformRandomNumberGenerator &urng, const RealType mean, const RealType stddev) - { - typedef typename UniformRandomNumberGenerator::result_type uint_type; - const uint_type urng_range = UniformRandomNumberGenerator::max - UniformRandomNumberGenerator::min; - - // Constants for conversion - const RealType S1 = static_cast(1) / urng_range; - const RealType S2 = S1 / 2; - - RealType S3 = static_cast(-1.4142135623730950488016887242097); // -sqrt(2) - - // Get the integer value - uint_type u = urng() - UniformRandomNumberGenerator::min; - - // Ensure the conversion to float will give a value in the range [0,0.5) - if(u > (urng_range / 2)) - { - u = urng_range - u; - S3 = -S3; - } - - // Convert to floating point in [0,0.5) - RealType p = u*S1 + S2; - - // Apply inverse error function - return mean + stddev * S3 * erfcinv(2 * p); - } - - // no-op - __host__ __device__ - void reset() {} -}; - -// this version samples the normal distribution using -// Marsaglia's "polar method" -template - class normal_distribution_portable -{ - protected: - normal_distribution_portable() - : m_valid(false) - {} - - normal_distribution_portable(const normal_distribution_portable &other) - : m_valid(other.m_valid) - {} - - void reset() - { - m_valid = false; - } - - // note that we promise to call this member function with the same mean and stddev - template - __host__ __device__ - RealType sample(UniformRandomNumberGenerator &urng, const RealType mean, const RealType stddev) - { - // implementation from Boost - // allow for Koenig lookup - using std::sqrt; using std::log; using std::sin; using std::cos; - - if(!m_valid) - { - uniform_real_distribution u01; - m_r1 = u01(urng); - m_r2 = u01(urng); - m_cached_rho = sqrt(-RealType(2) * log(RealType(1)-m_r2)); - - m_valid = true; - } - else - { - m_valid = false; - } - - const RealType pi = RealType(3.14159265358979323846); - - RealType result = m_cached_rho * (m_valid ? - cos(RealType(2)*pi*m_r1) : - sin(RealType(2)*pi*m_r1)); - - return result; - } - - private: - RealType m_r1, m_r2, m_cached_rho; - bool m_valid; -}; - -template - struct normal_distribution_base -{ -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - typedef normal_distribution_nvcc type; -#else - typedef normal_distribution_portable type; -#endif -}; - -} // end detail -} // end random -} // end thrust - diff --git a/compat/thrust/random/detail/random_core_access.h b/compat/thrust/random/detail/random_core_access.h deleted file mode 100644 index 81f58e2207..0000000000 --- a/compat/thrust/random/detail/random_core_access.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -namespace thrust -{ - -namespace random -{ - -namespace detail -{ - -struct random_core_access -{ - -template -static OStream &stream_out(OStream &os, const EngineOrDistribution &x) -{ - return x.stream_out(os); -} - -template -static IStream &stream_in(IStream &is, EngineOrDistribution &x) -{ - return x.stream_in(is); -} - -template -__host__ __device__ -static bool equal(const EngineOrDistribution &lhs, const EngineOrDistribution &rhs) -{ - return lhs.equal(rhs); -} - -}; // end random_core_access - -} // end detail - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/subtract_with_carry_engine.inl b/compat/thrust/random/detail/subtract_with_carry_engine.inl deleted file mode 100644 index a58b2665b2..0000000000 --- a/compat/thrust/random/detail/subtract_with_carry_engine.inl +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - - -template - subtract_with_carry_engine - ::subtract_with_carry_engine(result_type value) -{ - seed(value); -} // end subtract_with_carry_engine::subtract_with_carry_engine() - - -template - void subtract_with_carry_engine - ::seed(result_type value) -{ - thrust::random::linear_congruential_engine e(value == 0u ? default_seed : value); - - // initialize state - for(size_t i = 0; i < long_lag; ++i) - { - m_x[i] = detail::mod(e()); - } // end for i - - m_carry = (m_x[long_lag-1] == 0); - m_k = 0; -} // end subtract_with_carry_engine::seed() - - -template - typename subtract_with_carry_engine::result_type - subtract_with_carry_engine - ::operator()(void) -{ - // XXX we probably need to cache these m_x[m_k] in a register - // maybe we need to cache the use of all member variables - int short_index = m_k - short_lag; - if(short_index < 0) - short_index += long_lag; - result_type xi; - if (m_x[short_index] >= m_x[m_k] + m_carry) - { - // x(n) >= 0 - xi = m_x[short_index] - m_x[m_k] - m_carry; - m_carry = 0; - } - else - { - // x(n) < 0 - xi = modulus - m_x[m_k] - m_carry + m_x[short_index]; - m_carry = 1; - } - m_x[m_k] = xi; - ++m_k; - if(m_k >= long_lag) - m_k = 0; - return xi; -} // end subtract_with_carry_engine::operator()() - - -template - void subtract_with_carry_engine - ::discard(unsigned long long z) -{ - for(; z > 0; --z) - { - this->operator()(); - } // end for -} // end subtract_with_carry_engine::discard() - - -template - template - std::basic_ostream& subtract_with_carry_engine - ::stream_out(std::basic_ostream &os) const -{ - typedef std::basic_ostream ostream_type; - typedef typename ostream_type::ios_base ios_base; - - const typename ios_base::fmtflags flags = os.flags(); - const CharT fill = os.fill(); - const CharT space = os.widen(' '); - os.flags(ios_base::dec | ios_base::fixed | ios_base::left); - os.fill(space); - - const UIntType long_lag = r; - - for(size_t i = 0; i < r; ++i) - os << m_x[(i + m_k) % long_lag] << space; - os << m_carry; - - os.flags(flags); - os.fill(fill); - return os; -} - - -template - template - std::basic_istream& subtract_with_carry_engine - ::stream_in(std::basic_istream &is) -{ - typedef std::basic_istream istream_type; - typedef typename istream_type::ios_base ios_base; - - const typename ios_base::fmtflags flags = is.flags(); - is.flags(ios_base::dec | ios_base::skipws); - - for(size_t i = 0; i < r; ++i) - is >> m_x[i]; - is >> m_carry; - - m_k = 0; - - is.flags(flags); - return is; -} - - -template - bool subtract_with_carry_engine - ::equal(const subtract_with_carry_engine &rhs) const -{ - const UIntType long_lag = r; - - bool result = true; - for(size_t i = 0; i < r; ++i) - { - result &= (m_x[(i + m_k) % long_lag] == rhs.m_x[(i + rhs.m_k) % long_lag]); - } - - // XXX not sure if this last check is necessary - result &= (m_carry == rhs.m_carry); - - return result; -} - - -template - std::basic_ostream& - operator<<(std::basic_ostream &os, - const subtract_with_carry_engine &e) -{ - return thrust::random::detail::random_core_access::stream_out(os,e); -} - - -template - std::basic_istream& - operator>>(std::basic_istream &is, - subtract_with_carry_engine &e) -{ - return thrust::random::detail::random_core_access::stream_in(is,e); -} - - -template - bool operator==(const subtract_with_carry_engine &lhs, - const subtract_with_carry_engine &rhs) -{ - return thrust::random::detail::random_core_access::equal(lhs,rhs); -} - - -template - bool operator!=(const subtract_with_carry_engine &lhs, - const subtract_with_carry_engine &rhs) -{ - return !(lhs == rhs); -} - - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/uniform_int_distribution.inl b/compat/thrust/random/detail/uniform_int_distribution.inl deleted file mode 100644 index e92754c5e7..0000000000 --- a/compat/thrust/random/detail/uniform_int_distribution.inl +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - - -template - uniform_int_distribution - ::uniform_int_distribution(IntType a, IntType b) - :m_param(a,b) -{ -} // end uniform_int_distribution::uniform_int_distribution() - - -template - uniform_int_distribution - ::uniform_int_distribution(const param_type &parm) - :m_param(parm) -{ -} // end uniform_int_distribution::uniform_int_distribution() - - -template - void uniform_int_distribution - ::reset(void) -{ -} // end uniform_int_distribution::reset() - - -template - template - typename uniform_int_distribution::result_type - uniform_int_distribution - ::operator()(UniformRandomNumberGenerator &urng) -{ - return operator()(urng, m_param); -} // end uniform_int_distribution::operator()() - - -template - template - typename uniform_int_distribution::result_type - uniform_int_distribution - ::operator()(UniformRandomNumberGenerator &urng, const param_type &parm) -{ - // XXX this implementation is somewhat hacky and will skip - // values if the range of the RNG is smaller than the range of the distribution - // we should improve this implementation in a later version - - typedef typename thrust::detail::largest_available_float::type float_type; - - const float_type real_min(parm.first); - const float_type real_max(parm.second); - - // add one to the right end of the interval because it is half-open - // XXX adding 1.0 to a potentially large floating point number seems like a bad idea - uniform_real_distribution real_dist(real_min, real_max + float_type(1)); - - return static_cast(real_dist(urng)); -} // end uniform_int_distribution::operator()() - - -template - typename uniform_int_distribution::result_type - uniform_int_distribution - ::a(void) const -{ - return m_param.first; -} // end uniform_int_distribution::a() - - -template - typename uniform_int_distribution::result_type - uniform_int_distribution - ::b(void) const -{ - return m_param.second; -} // end uniform_int_distribution::b() - - -template - typename uniform_int_distribution::param_type - uniform_int_distribution - ::param(void) const -{ - return m_param; -} // end uniform_int_distribution::param() - - -template - void uniform_int_distribution - ::param(const param_type &parm) -{ - m_param = parm; -} // end uniform_int_distribution::param() - - -template - typename uniform_int_distribution::result_type - uniform_int_distribution - ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const -{ - return a(); -} // end uniform_int_distribution::min() - - -template - typename uniform_int_distribution::result_type - uniform_int_distribution - ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const -{ - return b(); -} // end uniform_int_distribution::max() - - -template - bool uniform_int_distribution - ::equal(const uniform_int_distribution &rhs) const -{ - return param() == rhs.param(); -} - - -template - template - std::basic_ostream& - uniform_int_distribution - ::stream_out(std::basic_ostream &os) const -{ - typedef std::basic_ostream ostream_type; - typedef typename ostream_type::ios_base ios_base; - - // save old flags and fill character - const typename ios_base::fmtflags flags = os.flags(); - const CharT fill = os.fill(); - - const CharT space = os.widen(' '); - os.flags(ios_base::dec | ios_base::fixed | ios_base::left); - os.fill(space); - - os << a() << space << b(); - - // restore old flags and fill character - os.flags(flags); - os.fill(fill); - return os; -} - - -template - template - std::basic_istream& - uniform_int_distribution - ::stream_in(std::basic_istream &is) -{ - typedef std::basic_istream istream_type; - typedef typename istream_type::ios_base ios_base; - - // save old flags - const typename ios_base::fmtflags flags = is.flags(); - - is.flags(ios_base::skipws); - - is >> m_param.first >> m_param.second; - - // restore old flags - is.flags(flags); - return is; -} - - -template -bool operator==(const uniform_int_distribution &lhs, - const uniform_int_distribution &rhs) -{ - return thrust::random::detail::random_core_access::equal(lhs,rhs); -} - - -template -bool operator!=(const uniform_int_distribution &lhs, - const uniform_int_distribution &rhs) -{ - return !(lhs == rhs); -} - - -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const uniform_int_distribution &d) -{ - return thrust::random::detail::random_core_access::stream_out(os,d); -} - - -template -std::basic_istream& -operator>>(std::basic_istream &is, - uniform_int_distribution &d) -{ - return thrust::random::detail::random_core_access::stream_in(is,d); -} - - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/uniform_real_distribution.inl b/compat/thrust/random/detail/uniform_real_distribution.inl deleted file mode 100644 index 6f6d6b57b5..0000000000 --- a/compat/thrust/random/detail/uniform_real_distribution.inl +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -namespace thrust -{ - -namespace random -{ - - -template - uniform_real_distribution - ::uniform_real_distribution(RealType a, RealType b) - :m_param(a,b) -{ -} // end uniform_real_distribution::uniform_real_distribution() - -template - uniform_real_distribution - ::uniform_real_distribution(const param_type &parm) - :m_param(parm) -{ -} // end uniform_real_distribution::uniform_real_distribution() - -template - void uniform_real_distribution - ::reset(void) -{ -} // end uniform_real_distribution::reset() - -template - template - typename uniform_real_distribution::result_type - uniform_real_distribution - ::operator()(UniformRandomNumberGenerator &urng) -{ - return operator()(urng, m_param); -} // end uniform_real::operator()() - -template - template - typename uniform_real_distribution::result_type - uniform_real_distribution - ::operator()(UniformRandomNumberGenerator &urng, - const param_type &parm) -{ - // call the urng & map its result to [0,1) - result_type result = static_cast(urng() - UniformRandomNumberGenerator::min); - - // adding one to the denominator ensures that the interval is half-open at 1.0 - // XXX adding 1.0 to a potentially large floating point number seems like a bad idea - // XXX OTOH adding 1 to what is potentially UINT_MAX also seems like a bad idea - // XXX we could statically check if 1u + (max - min) is representable and do that, otherwise use the current implementation - result /= (result_type(1) + static_cast(UniformRandomNumberGenerator::max - UniformRandomNumberGenerator::min)); - - return (result * (parm.second - parm.first)) + parm.first; -} // end uniform_real::operator()() - -template - typename uniform_real_distribution::result_type - uniform_real_distribution - ::a(void) const -{ - return m_param.first; -} // end uniform_real::a() - -template - typename uniform_real_distribution::result_type - uniform_real_distribution - ::b(void) const -{ - return m_param.second; -} // end uniform_real_distribution::b() - -template - typename uniform_real_distribution::param_type - uniform_real_distribution - ::param(void) const -{ - return m_param;; -} // end uniform_real_distribution::param() - -template - void uniform_real_distribution - ::param(const param_type &parm) -{ - m_param = parm; -} // end uniform_real_distribution::param() - -template - typename uniform_real_distribution::result_type - uniform_real_distribution - ::min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const -{ - return a(); -} // end uniform_real_distribution::min() - -template - typename uniform_real_distribution::result_type - uniform_real_distribution - ::max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const -{ - return b(); -} // end uniform_real_distribution::max() - - -template - bool uniform_real_distribution - ::equal(const uniform_real_distribution &rhs) const -{ - return m_param == rhs.param(); -} - - -template - template - std::basic_ostream& - uniform_real_distribution - ::stream_out(std::basic_ostream &os) const -{ - typedef std::basic_ostream ostream_type; - typedef typename ostream_type::ios_base ios_base; - - // save old flags and fill character - const typename ios_base::fmtflags flags = os.flags(); - const CharT fill = os.fill(); - - const CharT space = os.widen(' '); - os.flags(ios_base::dec | ios_base::fixed | ios_base::left); - os.fill(space); - - os << a() << space << b(); - - // restore old flags and fill character - os.flags(flags); - os.fill(fill); - return os; -} - - -template - template - std::basic_istream& - uniform_real_distribution - ::stream_in(std::basic_istream &is) -{ - typedef std::basic_istream istream_type; - typedef typename istream_type::ios_base ios_base; - - // save old flags - const typename ios_base::fmtflags flags = is.flags(); - - is.flags(ios_base::skipws); - - is >> m_param.first >> m_param.second; - - // restore old flags - is.flags(flags); - return is; -} - - -template -bool operator==(const uniform_real_distribution &lhs, - const uniform_real_distribution &rhs) -{ - return thrust::random::detail::random_core_access::equal(lhs,rhs); -} - - -template -bool operator!=(const uniform_real_distribution &lhs, - const uniform_real_distribution &rhs) -{ - return !(lhs == rhs); -} - - -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const uniform_real_distribution &d) -{ - return thrust::random::detail::random_core_access::stream_out(os,d); -} - - -template -std::basic_istream& -operator>>(std::basic_istream &is, - uniform_real_distribution &d) -{ - return thrust::random::detail::random_core_access::stream_in(is,d); -} - - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/xor_combine_engine.inl b/compat/thrust/random/detail/xor_combine_engine.inl deleted file mode 100644 index b138722f8b..0000000000 --- a/compat/thrust/random/detail/xor_combine_engine.inl +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace thrust -{ - -namespace random -{ - -template - xor_combine_engine - ::xor_combine_engine(void) - :m_b1(),m_b2() -{ -} // end xor_combine_engine::xor_combine_engine() - -template - xor_combine_engine - ::xor_combine_engine(const base1_type &urng1, const base2_type &urng2) - :m_b1(urng1),m_b2(urng2) -{ -} // end xor_combine_engine::xor_combine_engine() - -template - xor_combine_engine - ::xor_combine_engine(result_type s) - :m_b1(s),m_b2(s) -{ -} // end xor_combine_engine::xor_combine_engine() - -template - void xor_combine_engine - ::seed(void) -{ - m_b1.seed(); - m_b2.seed(); -} // end xor_combine_engine::seed() - -template - void xor_combine_engine - ::seed(result_type s) -{ - m_b1.seed(s); - m_b2.seed(s); -} // end xor_combine_engine::seed() - -template - const typename xor_combine_engine::base1_type & - xor_combine_engine - ::base1(void) const -{ - return m_b1; -} // end xor_combine_engine::base1() - -template - const typename xor_combine_engine::base2_type & - xor_combine_engine - ::base2(void) const -{ - return m_b2; -} // end xor_combine_engine::base2() - -template - typename xor_combine_engine::result_type - xor_combine_engine - ::operator()(void) -{ - return (result_type(m_b1() - base1_type::min) << shift1) ^ - (result_type(m_b2() - base2_type::min) << shift2); -} // end xor_combine_engine::operator()() - -template - void xor_combine_engine - ::discard(unsigned long long z) -{ - for(; z > 0; --z) - { - this->operator()(); - } // end for -} // end xor_combine_engine::discard() - - -template - template - std::basic_ostream& xor_combine_engine - ::stream_out(std::basic_ostream &os) const -{ - typedef std::basic_ostream ostream_type; - typedef typename ostream_type::ios_base ios_base; - - // save old flags and fill character - const typename ios_base::fmtflags flags = os.flags(); - const CharT fill = os.fill(); - - const CharT space = os.widen(' '); - os.flags(ios_base::dec | ios_base::fixed | ios_base::left); - os.fill(space); - - // output each base engine in turn - os << base1() << space << base2(); - - // restore old flags and fill character - os.flags(flags); - os.fill(fill); - return os; -} - - -template - template - std::basic_istream& xor_combine_engine - ::stream_in(std::basic_istream &is) -{ - typedef std::basic_istream istream_type; - typedef typename istream_type::ios_base ios_base; - - // save old flags - const typename ios_base::fmtflags flags = is.flags(); - - is.flags(ios_base::skipws); - - // input each base engine in turn - is >> m_b1 >> m_b2; - - // restore old flags - is.flags(flags); - return is; -} - - -template - bool xor_combine_engine - ::equal(const xor_combine_engine &rhs) const -{ - return (m_b1 == rhs.m_b1) && (m_b2 == rhs.m_b2); -} - - -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const xor_combine_engine &e) -{ - return thrust::random::detail::random_core_access::stream_out(os,e); -} - - -template -std::basic_istream& -operator>>(std::basic_istream &is, - xor_combine_engine &e) -{ - return thrust::random::detail::random_core_access::stream_in(is,e); -} - - -template -bool operator==(const xor_combine_engine &lhs, - const xor_combine_engine &rhs) -{ - return thrust::random::detail::random_core_access::equal(lhs,rhs); -} - - -template -bool operator!=(const xor_combine_engine &lhs, - const xor_combine_engine &rhs) -{ - return !(lhs == rhs); -} - - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/detail/xor_combine_engine_max.h b/compat/thrust/random/detail/xor_combine_engine_max.h deleted file mode 100644 index 8bad9a462c..0000000000 --- a/compat/thrust/random/detail/xor_combine_engine_max.h +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - -namespace detail -{ - - -namespace math = thrust::detail::mpl::math; - - -namespace detail -{ - -// two cases for this function avoids compile-time warnings of overflow -template - struct lshift_w -{ - static const UIntType value = 0; -}; - - -template - struct lshift_w -{ - static const UIntType value = lhs << rhs; -}; - -} // end detail - - -template - struct lshift_w -{ - static const bool shift_will_overflow = rhs >= w; - - static const UIntType value = detail::lshift_w::value; -}; - - -template - struct lshift - : lshift_w::digits, lhs, rhs> -{}; - - -template - struct two_to_the_power - : lshift -{}; - - -template - class xor_combine_engine_max_aux_constants -{ - public: - static const result_type two_to_the_d = two_to_the_power::value; - static const result_type c = lshift::value; - - static const result_type t = - math::max< - result_type, - c, - b - >::value; - - static const result_type u = - math::min< - result_type, - c, - b - >::value; - - static const result_type p = math::log2::value; - static const result_type two_to_the_p = two_to_the_power::value; - - static const result_type k = math::div::value; -}; - - -template struct xor_combine_engine_max_aux; - - -template - struct xor_combine_engine_max_aux_case4 -{ - typedef xor_combine_engine_max_aux_constants constants; - - static const result_type k_plus_1_times_two_to_the_p = - lshift< - result_type, - math::plus::value, - constants::p - >::value; - - static const result_type M = - xor_combine_engine_max_aux< - result_type, - math::div< - result_type, - math::mod< - result_type, - constants::u, - constants::two_to_the_p - >::value, - constants::two_to_the_p - >::value, - math::mod< - result_type, - constants::t, - constants::two_to_the_p - >::value, - d - >::value; - - static const result_type value = math::plus::value; -}; - - -template - struct xor_combine_engine_max_aux_case3 -{ - typedef xor_combine_engine_max_aux_constants constants; - - static const result_type k_plus_1_times_two_to_the_p = - lshift< - result_type, - math::plus::value, - constants::p - >::value; - - static const result_type M = - xor_combine_engine_max_aux< - result_type, - math::div< - result_type, - math::mod< - result_type, - constants::t, - constants::two_to_the_p - >::value, - constants::two_to_the_p - >::value, - math::mod< - result_type, - constants::u, - constants::two_to_the_p - >::value, - d - >::value; - - static const result_type value = math::plus::value; -}; - - - -template - struct xor_combine_engine_max_aux_case2 -{ - typedef xor_combine_engine_max_aux_constants constants; - - static const result_type k_plus_1_times_two_to_the_p = - lshift< - result_type, - math::plus::value, - constants::p - >::value; - - static const result_type value = - math::minus< - result_type, - k_plus_1_times_two_to_the_p, - 1 - >::value; -}; - - -template - struct xor_combine_engine_max_aux_case1 -{ - static const result_type c = lshift::value; - - static const result_type value = math::plus::value; -}; - - -template - struct xor_combine_engine_max_aux_2 -{ - typedef xor_combine_engine_max_aux_constants constants; - - static const result_type value = - thrust::detail::eval_if< - // if k is odd... - math::is_odd::value, - thrust::detail::identity_< - thrust::detail::integral_constant< - result_type, - xor_combine_engine_max_aux_case2::value - > - >, - thrust::detail::eval_if< - // otherwise if a * 2^3 >= b, then case 3 - a * constants::two_to_the_d >= b, - thrust::detail::identity_< - thrust::detail::integral_constant< - result_type, - xor_combine_engine_max_aux_case3::value - > - >, - // otherwise, case 4 - thrust::detail::identity_< - thrust::detail::integral_constant< - result_type, - xor_combine_engine_max_aux_case4::value - > - > - > - >::type::value; -}; - - -template::value)> - struct xor_combine_engine_max_aux_1 - : xor_combine_engine_max_aux_case1 -{}; - - -template - struct xor_combine_engine_max_aux_1 - : xor_combine_engine_max_aux_2 -{}; - - -template - struct xor_combine_engine_max_aux - : xor_combine_engine_max_aux_1 -{}; - - -template - struct xor_combine_engine_max -{ - static const size_t w = std::numeric_limits::digits; - - static const result_type m1 = - math::min< - result_type, - result_type(Engine1::max - Engine1::min), - two_to_the_power::value - 1 - >::value; - - static const result_type m2 = - math::min< - result_type, - result_type(Engine2::max - Engine2::min), - two_to_the_power::value - 1 - >::value; - - static const result_type s = s1 - s2; - - static const result_type M = - xor_combine_engine_max_aux< - result_type, - m1, - m2, - s - >::value; - - // the value is M(m1,m2,s) lshift_w s2 - static const result_type value = - lshift_w< - result_type, - w, - M, - s2 - >::value; -}; // end xor_combine_engine_max - -} // end detail - -} // end random - -} // end thrust - diff --git a/compat/thrust/random/discard_block_engine.h b/compat/thrust/random/discard_block_engine.h deleted file mode 100644 index c902c5863b..0000000000 --- a/compat/thrust/random/discard_block_engine.h +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file discard_block_engine.h - * \brief A random number engine which adapts a base engine and produces - * numbers by discarding all but a contiguous blocks of its values. - */ - -#pragma once - -#include - -#include -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - -/*! \addtogroup random_number_engine_adaptors Random Number Engine Adaptor Class Templates - * \ingroup random - * \{ - */ - -/*! \class discard_block_engine - * \brief A \p discard_block_engine adapts an existing base random number engine and produces - * random values by discarding some of the values returned by its base engine. - * Each cycle of the compound engine begins by returning \c r values successively produced - * by the base engine and ends by discarding p-r such values. The engine's state - * is the state of its base engine followed by the number of calls to operator() - * that have occurred since the beginning of the current cycle. - * - * \tparam Engine The type of the base random number engine to adapt. - * \tparam p The discard cycle length. - * \tparam r The number of values to return of the base engine. Because p-r will be - * discarded, r <= p. - * - * The following code snippet shows an example of using a \p discard_block_engine instance: - * - * \code - * #include - * #include - * #include - * - * int main(void) - * { - * // create a discard_block_engine from minstd_rand, with a cycle length of 13 - * // keep every first 10 values, and discard the next 3 - * thrust::discard_block_engine rng; - * - * // print a random number to standard output - * std::cout << rng() << std::endl; - * - * return 0; - * } - * \endcode - */ -template - class discard_block_engine -{ - public: - // types - - /*! \typedef base_type - * \brief The type of the adapted base random number engine. - */ - typedef Engine base_type; - - /*! \typedef result_type - * \brief The type of the unsigned integer produced by this \p linear_congruential_engine. - */ - typedef typename base_type::result_type result_type; - - // engine characteristics - - /*! The length of the production cycle. - */ - static const size_t block_size = p; - - /*! The number of used numbers per production cycle. - */ - static const size_t used_block = r; - - /*! The smallest value this \p discard_block_engine may potentially produce. - */ - static const result_type min = base_type::min; - - /*! The largest value this \p discard_block_engine may potentially produce. - */ - static const result_type max = base_type::max; - - // constructors and seeding functions - - /*! This constructor constructs a new \p discard_block_engine and constructs - * its \p base_type engine using its null constructor. - */ - __host__ __device__ - discard_block_engine(); - - /*! This constructor constructs a new \p discard_block_engine using - * a given \p base_type engine to initialize its adapted base engine. - * - * \param urng A \p base_type to use to initialize this \p discard_block_engine's - * adapted base engine. - */ - __host__ __device__ - explicit discard_block_engine(const base_type &urng); - - /*! This constructor initializes a new \p discard_block_engine with a given seed. - * - * \param s The seed used to intialize this \p discard_block_engine's adapted base engine. - */ - __host__ __device__ - explicit discard_block_engine(result_type s); - - /*! This method initializes the state of this \p discard_block_engine's adapted base engine - * by using its \p default_seed value. - */ - __host__ __device__ - void seed(void); - - /*! This method initializes the state of this \p discard_block_engine's adapted base engine - * by using the given seed. - * - * \param s The seed with which to intialize this \p discard_block_engine's adapted base engine. - */ - __host__ __device__ - void seed(result_type s); - - // generating functions - - /*! This member function produces a new random value and updates this \p discard_block_engine's state. - * \return A new random number. - */ - __host__ __device__ - result_type operator()(void); - - /*! This member function advances this \p discard_block_engine's state a given number of times - * and discards the results. - * - * \param z The number of random values to discard. - * \note This function is provided because an implementation may be able to accelerate it. - */ - __host__ __device__ - void discard(unsigned long long z); - - // property functions - - /*! This member function returns a const reference to this \p discard_block_engine's - * adapted base engine. - * - * \return A const reference to the base engine this \p discard_block_engine adapts. - */ - __host__ __device__ - const base_type &base(void) const; - - /*! \cond - */ - private: - base_type m_e; - unsigned int m_n; - - friend struct thrust::random::detail::random_core_access; - - __host__ __device__ - bool equal(const discard_block_engine &rhs) const; - - template - std::basic_ostream& stream_out(std::basic_ostream &os) const; - - template - std::basic_istream& stream_in(std::basic_istream &is); - /*! \endcond - */ -}; // end discard_block_engine - - -/*! This function checks two \p discard_block_engines for equality. - * \param lhs The first \p discard_block_engine to test. - * \param rhs The second \p discard_block_engine to test. - * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator==(const discard_block_engine &lhs, - const discard_block_engine &rhs); - - -/*! This function checks two \p discard_block_engines for inequality. - * \param lhs The first \p discard_block_engine to test. - * \param rhs The second \p discard_block_engine to test. - * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator!=(const discard_block_engine &lhs, - const discard_block_engine &rhs); - - -/*! This function streams a discard_block_engine to a \p std::basic_ostream. - * \param os The \p basic_ostream to stream out to. - * \param e The \p discard_block_engine to stream out. - * \return \p os - */ -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const discard_block_engine &e); - - -/*! This function streams a discard_block_engine in from a std::basic_istream. - * \param is The \p basic_istream to stream from. - * \param e The \p discard_block_engine to stream in. - * \return \p is - */ -template -std::basic_istream& -operator>>(std::basic_istream &is, - discard_block_engine &e); - -/*! \} // end random_number_engine_adaptors - */ - -} // end random - -// import names into thrust:: -using random::discard_block_engine; - -} // end thrust - -#include - diff --git a/compat/thrust/random/linear_congruential_engine.h b/compat/thrust/random/linear_congruential_engine.h deleted file mode 100644 index 0added0069..0000000000 --- a/compat/thrust/random/linear_congruential_engine.h +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file linear_congruential_engine.h - * \brief A linear congruential pseudorandom number engine. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - -/*! \addtogroup random_number_engine_templates Random Number Engine Class Templates - * \ingroup random - * \{ - */ - -/*! \class linear_congruential_engine - * \brief A \p linear_congruential_engine random number engine produces unsigned integer - * random numbers using a linear congruential random number generation algorithm. - * - * The generation algorithm has the form x_i = (a * x_{i-1} + c) mod m. - * - * \tparam UIntType The type of unsigned integer to produce. - * \tparam a The multiplier used in the generation algorithm. - * \tparam c The increment used in the generation algorithm. - * \tparam m The modulus used in the generation algorithm. - * - * \note Inexperienced users should not use this class template directly. Instead, use - * \p minstd_rand or \p minstd_rand0. - * - * The following code snippet shows examples of use of a \p linear_congruential_engine instance: - * - * \code - * #include - * #include - * - * int main(void) - * { - * // create a minstd_rand object, which is an instance of linear_congruential_engine - * thrust::minstd_rand rng1; - * - * // output some random values to cout - * std::cout << rng1() << std::endl; - * - * // a random value is printed - * - * // create a new minstd_rand from a seed - * thrust::minstd_rand rng2(13); - * - * // discard some random values - * rng2.discard(13); - * - * // stream the object to an iostream - * std::cout << rng2 << std::endl; - * - * // rng2's current state is printed - * - * // print the minimum and maximum values that minstd_rand can produce - * std::cout << thrust::minstd_rand::min << std::endl; - * std::cout << thrust::minstd_rand::max << std::endl; - * - * // the range of minstd_rand is printed - * - * // save the state of rng2 to a different object - * thrust::minstd_rand rng3 = rng2; - * - * // compare rng2 and rng3 - * std::cout << (rng2 == rng3) << std::endl; - * - * // 1 is printed - * - * // re-seed rng2 with a different seed - * rng2.seed(7); - * - * // compare rng2 and rng3 - * std::cout << (rng2 == rng3) << std::endl; - * - * // 0 is printed - * - * return 0; - * } - * - * \endcode - * - * \see thrust::random::minstd_rand - * \see thrust::random::minstd_rand0 - */ -template - class linear_congruential_engine -{ - public: - // types - - /*! \typedef result_type - * \brief The type of the unsigned integer produced by this \p linear_congruential_engine. - */ - typedef UIntType result_type; - - // engine characteristics - - /*! The multiplier used in the generation algorithm. - */ - static const result_type multiplier = a; - - /*! The increment used in the generation algorithm. - */ - static const result_type increment = c; - - /*! The modulus used in the generation algorithm. - */ - static const result_type modulus = m; - - /*! The smallest value this \p linear_congruential_engine may potentially produce. - */ - static const result_type min = c == 0u ? 1u : 0u; - - /*! The largest value this \p linear_congruential_engine may potentially produce. - */ - static const result_type max = m - 1u; - - /*! The default seed of this \p linear_congruential_engine. - */ - static const result_type default_seed = 1u; - - // constructors and seeding functions - - /*! This constructor, which optionally accepts a seed, initializes a new - * \p linear_congruential_engine. - * - * \param s The seed used to intialize this \p linear_congruential_engine's state. - */ - __host__ __device__ - explicit linear_congruential_engine(result_type s = default_seed); - - /*! This method initializes this \p linear_congruential_engine's state, and optionally accepts - * a seed value. - * - * \param s The seed used to initializes this \p linear_congruential_engine's state. - */ - __host__ __device__ - void seed(result_type s = default_seed); - - // generating functions - - /*! This member function produces a new random value and updates this \p linear_congruential_engine's state. - * \return A new random number. - */ - __host__ __device__ - result_type operator()(void); - - /*! This member function advances this \p linear_congruential_engine's state a given number of times - * and discards the results. - * - * \param z The number of random values to discard. - * \note This function is provided because an implementation may be able to accelerate it. - */ - __host__ __device__ - void discard(unsigned long long z); - - /*! \cond - */ - private: - result_type m_x; - - static void transition(result_type &state); - - friend struct thrust::random::detail::random_core_access; - - friend struct thrust::random::detail::linear_congruential_engine_discard; - - __host__ __device__ - bool equal(const linear_congruential_engine &rhs) const; - - template - std::basic_ostream& stream_out(std::basic_ostream &os) const; - - template - std::basic_istream& stream_in(std::basic_istream &is); - - /*! \endcond - */ -}; // end linear_congruential_engine - - -/*! This function checks two \p linear_congruential_engines for equality. - * \param lhs The first \p linear_congruential_engine to test. - * \param rhs The second \p linear_congruential_engine to test. - * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator==(const linear_congruential_engine &lhs, - const linear_congruential_engine &rhs); - - -/*! This function checks two \p linear_congruential_engines for inequality. - * \param lhs The first \p linear_congruential_engine to test. - * \param rhs The second \p linear_congruential_engine to test. - * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator!=(const linear_congruential_engine &lhs, - const linear_congruential_engine &rhs); - - -/*! This function streams a linear_congruential_engine to a \p std::basic_ostream. - * \param os The \p basic_ostream to stream out to. - * \param e The \p linear_congruential_engine to stream out. - * \return \p os - */ -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const linear_congruential_engine &e); - - -/*! This function streams a linear_congruential_engine in from a std::basic_istream. - * \param is The \p basic_istream to stream from. - * \param e The \p linear_congruential_engine to stream in. - * \return \p is - */ -template -std::basic_istream& -operator>>(std::basic_istream &is, - linear_congruential_engine &e); - - -/*! \} // random_number_engine_templates - */ - - -/*! \addtogroup predefined_random - * \{ - */ - -// XXX the type N2111 used here was uint_fast32_t - -/*! \typedef minstd_rand0 - * \brief A random number engine with predefined parameters which implements a version of - * the Minimal Standard random number generation algorithm. - * \note The 10000th consecutive invocation of a default-constructed object of type \p minstd_rand0 - * shall produce the value \c 1043618065 . - */ -typedef linear_congruential_engine minstd_rand0; - - -/*! \typedef minstd_rand - * \brief A random number engine with predefined parameters which implements a version of - * the Minimal Standard random number generation algorithm. - * \note The 10000th consecutive invocation of a default-constructed object of type \p minstd_rand - * shall produce the value \c 399268537 . - */ -typedef linear_congruential_engine minstd_rand; - -/*! \} // predefined_random - */ - -} // end random - -// import names into thrust:: -using random::linear_congruential_engine; -using random::minstd_rand; -using random::minstd_rand0; - -} // end thrust - -#include - diff --git a/compat/thrust/random/linear_feedback_shift_engine.h b/compat/thrust/random/linear_feedback_shift_engine.h deleted file mode 100644 index f5646c9483..0000000000 --- a/compat/thrust/random/linear_feedback_shift_engine.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file linear_feedback_shift_engine.h - * \brief A linear feedback shift pseudorandom number generator. - */ - -/* - * Copyright Jens Maurer 2002 - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include -#include -#include -#include // for size_t -#include - -namespace thrust -{ - - -namespace random -{ - -/*! \addtogroup random_number_engine_templates - * \{ - */ - -/*! \class linear_feedback_shift_engine - * \brief A \p linear_feedback_shift_engine random number engine produces - * unsigned integer random values using a linear feedback shift random number - * generation algorithm. - * - * \tparam UIntType The type of unsigned integer to produce. - * \tparam w The word size of the produced values (w <= sizeof(UIntType)). - * \tparam k The k parameter of Tausworthe's 1965 algorithm. - * \tparam q The q exponent of Tausworthe's 1965 algorithm. - * \tparam s The step size of Tausworthe's 1965 algorithm. - * - * \note linear_feedback_shift_engine is based on the Boost Template Library's linear_feedback_shift. - */ -template - class linear_feedback_shift_engine -{ - public: - // types - - /*! \typedef result_type - * \brief The type of the unsigned integer produced by this \p linear_feedback_shift_engine. - */ - typedef UIntType result_type; - - // engine characteristics - - /*! The word size of the produced values. - */ - static const size_t word_size = w; - - /*! A constant used in the generation algorithm. - */ - static const size_t exponent1 = k; - - /*! A constant used in the generation algorithm. - */ - static const size_t exponent2 = q; - - /*! The step size used in the generation algorithm. - */ - static const size_t step_size = s; - - /*! \cond - */ - private: - static const result_type wordmask = - detail::linear_feedback_shift_engine_wordmask< - result_type, - w - >::value; - /*! \endcond - */ - - public: - - /*! The smallest value this \p linear_feedback_shift_engine may potentially produce. - */ - static const result_type min = 0; - - /*! The largest value this \p linear_feedback_shift_engine may potentially produce. - */ - static const result_type max = wordmask; - - /*! The default seed of this \p linear_feedback_shift_engine. - */ - static const result_type default_seed = 341u; - - // constructors and seeding functions - - /*! This constructor, which optionally accepts a seed, initializes a new - * \p linear_feedback_shift_engine. - * - * \param value The seed used to intialize this \p linear_feedback_shift_engine's state. - */ - __host__ __device__ - explicit linear_feedback_shift_engine(result_type value = default_seed); - - /*! This method initializes this \p linear_feedback_shift_engine's state, and optionally accepts - * a seed value. - * - * \param value The seed used to initializes this \p linear_feedback_shift_engine's state. - */ - __host__ __device__ - void seed(result_type value = default_seed); - - // generating functions - - /*! This member function produces a new random value and updates this \p linear_feedback_shift_engine's state. - * \return A new random number. - */ - __host__ __device__ - result_type operator()(void); - - /*! This member function advances this \p linear_feedback_shift_engine's state a given number of times - * and discards the results. - * - * \param z The number of random values to discard. - * \note This function is provided because an implementation may be able to accelerate it. - */ - __host__ __device__ - void discard(unsigned long long z); - - /*! \cond - */ - private: - result_type m_value; - - friend struct thrust::random::detail::random_core_access; - - __host__ __device__ - bool equal(const linear_feedback_shift_engine &rhs) const; - - template - std::basic_ostream& stream_out(std::basic_ostream &os) const; - - template - std::basic_istream& stream_in(std::basic_istream &is); - - /*! \endcond - */ -}; // end linear_feedback_shift_engine - - -/*! This function checks two \p linear_feedback_shift_engines for equality. - * \param lhs The first \p linear_feedback_shift_engine to test. - * \param rhs The second \p linear_feedback_shift_engine to test. - * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator==(const linear_feedback_shift_engine &lhs, - const linear_feedback_shift_engine &rhs); - - -/*! This function checks two \p linear_feedback_shift_engines for inequality. - * \param lhs The first \p linear_feedback_shift_engine to test. - * \param rhs The second \p linear_feedback_shift_engine to test. - * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator!=(const linear_feedback_shift_engine &lhs, - const linear_feedback_shift_engine &rhs); - - -/*! This function streams a linear_feedback_shift_engine to a \p std::basic_ostream. - * \param os The \p basic_ostream to stream out to. - * \param e The \p linear_feedback_shift_engine to stream out. - * \return \p os - */ -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const linear_feedback_shift_engine &e); - - -/*! This function streams a linear_feedback_shift_engine in from a std::basic_istream. - * \param is The \p basic_istream to stream from. - * \param e The \p linear_feedback_shift_engine to stream in. - * \return \p is - */ -template -std::basic_istream& -operator>>(std::basic_istream &is, - linear_feedback_shift_engine &e); - - -/*! \} // end random_number_engine_templates - */ - - -} // end random - -// import names into thrust:: -using random::linear_feedback_shift_engine; - -} // end thrust - -#include - diff --git a/compat/thrust/random/normal_distribution.h b/compat/thrust/random/normal_distribution.h deleted file mode 100644 index 5543f30a5f..0000000000 --- a/compat/thrust/random/normal_distribution.h +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file normal_distribution.h - * \brief A normal (Gaussian) distribution of real-valued numbers. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - - -/*! \addtogroup random_number_distributions - * \{ - */ - -/*! \class normal_distribution - * \brief A \p normal_distribution random number distribution produces floating point - * Normally distributed random numbers. - * - * \tparam RealType The type of floating point number to produce. - * - * The following code snippet demonstrates examples of using a \p normal_distribution with a - * random number engine to produce random values drawn from the Normal distribution with a given - * mean and variance: - * - * \code - * #include - * #include - * - * int main(void) - * { - * // create a minstd_rand object to act as our source of randomness - * thrust::minstd_rand rng; - * - * // create a normal_distribution to produce floats from the Normal distribution - * // with mean 2.0 and standard deviation 3.5 - * thrust::random::normal_distribution dist(2.0f, 3.5f); - * - * // write a random number to standard output - * std::cout << dist(rng) << std::endl; - * - * // write the mean of the distribution, just in case we forgot - * std::cout << dist.mean() << std::endl; - * - * // 2.0 is printed - * - * // and the standard deviation - * std::cout << dist.stddev() << std::endl; - * - * // 3.5 is printed - * - * return 0; - * } - * \endcode - */ -template - class normal_distribution - : public detail::normal_distribution_base::type -{ - private: - typedef typename detail::normal_distribution_base::type super_t; - - public: - // types - - /*! \typedef result_type - * \brief The type of the floating point number produced by this \p normal_distribution. - */ - typedef RealType result_type; - - /*! \typedef param_type - * \brief The type of the object encapsulating this \p normal_distribution's parameters. - */ - typedef thrust::pair param_type; - - // constructors and reset functions - - /*! This constructor creates a new \p normal_distribution from two values defining the - * half-open interval of the distribution. - * - * \param mean The mean (expected value) of the distribution. Defaults to \c 0.0. - * \param stddev The standard deviation of the distribution. Defaults to \c 1.0. - */ - __host__ __device__ - explicit normal_distribution(RealType mean = 0.0, RealType stddev = 1.0); - - /*! This constructor creates a new \p normal_distribution from a \p param_type object - * encapsulating the range of the distribution. - * - * \param parm A \p param_type object encapsulating the parameters (i.e., the mean and standard deviation) of the distribution. - */ - __host__ __device__ - explicit normal_distribution(const param_type &parm); - - /*! Calling this member function guarantees that subsequent uses of this - * \p normal_distribution do not depend on values produced by any random - * number generator prior to invoking this function. - */ - __host__ __device__ - void reset(void); - - // generating functions - - /*! This method produces a new Normal random integer drawn from this \p normal_distribution's - * range using a \p UniformRandomNumberGenerator as a source of randomness. - * - * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. - */ - template - __host__ __device__ - result_type operator()(UniformRandomNumberGenerator &urng); - - /*! This method produces a new Normal random integer as if by creating a new \p normal_distribution - * from the given \p param_type object, and calling its operator() method with the given - * \p UniformRandomNumberGenerator as a source of randomness. - * - * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. - * \param parm A \p param_type object encapsulating the parameters of the \p normal_distribution - * to draw from. - */ - template - __host__ __device__ - result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); - - // property functions - - /*! This method returns the value of the parameter with which this \p normal_distribution - * was constructed. - * - * \return The mean (expected value) of this \p normal_distribution's output. - */ - __host__ __device__ - result_type mean(void) const; - - /*! This method returns the value of the parameter with which this \p normal_distribution - * was constructed. - * - * \return The standard deviation of this \p uniform_real_distribution's output. - */ - __host__ __device__ - result_type stddev(void) const; - - /*! This method returns a \p param_type object encapsulating the parameters with which this - * \p normal_distribution was constructed. - * - * \return A \p param_type object encapsulating the parameters (i.e., the mean and standard deviation) of this \p normal_distribution. - */ - __host__ __device__ - param_type param(void) const; - - /*! This method changes the parameters of this \p normal_distribution using the values encapsulated - * in a given \p param_type object. - * - * \param parm A \p param_type object encapsulating the new parameters (i.e., the mean and variance) of this \p normal_distribution. - */ - __host__ __device__ - void param(const param_type &parm); - - /*! This method returns the smallest floating point number this \p normal_distribution can potentially produce. - * - * \return The lower bound of this \p normal_distribution's half-open interval. - */ - __host__ __device__ - result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; - - /*! This method returns the smallest number larger than largest floating point number this \p uniform_real_distribution can potentially produce. - * - * \return The upper bound of this \p normal_distribution's half-open interval. - */ - __host__ __device__ - result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; - - /*! \cond - */ - private: - param_type m_param; - - friend struct thrust::random::detail::random_core_access; - - __host__ __device__ - bool equal(const normal_distribution &rhs) const; - - template - std::basic_ostream& stream_out(std::basic_ostream &os) const; - - template - std::basic_istream& stream_in(std::basic_istream &is); - /*! \endcond - */ -}; // end normal_distribution - - -/*! This function checks two \p normal_distributions for equality. - * \param lhs The first \p normal_distribution to test. - * \param rhs The second \p normal_distribution to test. - * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator==(const normal_distribution &lhs, - const normal_distribution &rhs); - - -/*! This function checks two \p normal_distributions for inequality. - * \param lhs The first \p normal_distribution to test. - * \param rhs The second \p normal_distribution to test. - * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator!=(const normal_distribution &lhs, - const normal_distribution &rhs); - - -/*! This function streams a normal_distribution to a \p std::basic_ostream. - * \param os The \p basic_ostream to stream out to. - * \param d The \p normal_distribution to stream out. - * \return \p os - */ -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const normal_distribution &d); - - -/*! This function streams a normal_distribution in from a std::basic_istream. - * \param is The \p basic_istream to stream from. - * \param d The \p normal_distribution to stream in. - * \return \p is - */ -template -std::basic_istream& -operator>>(std::basic_istream &is, - normal_distribution &d); - - -/*! \} // end random_number_distributions - */ - - -} // end random - -using random::normal_distribution; - -} // end thrust - -#include - diff --git a/compat/thrust/random/subtract_with_carry_engine.h b/compat/thrust/random/subtract_with_carry_engine.h deleted file mode 100644 index b88810097b..0000000000 --- a/compat/thrust/random/subtract_with_carry_engine.h +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file subtract_with_carry_engine.h - * \brief A subtract-with-carry pseudorandom number generator - * based on Marsaglia & Zaman. - */ - -#pragma once - -#include -#include - -#include -#include // for size_t -#include - -namespace thrust -{ - -namespace random -{ - - -/*! \addtogroup random_number_engine_templates - * \{ - */ - -/*! \class subtract_with_carry_engine - * \brief A \p subtract_with_carry_engine random number engine produces unsigned - * integer random numbers using the subtract with carry algorithm of Marsaglia & Zaman. - * - * The generation algorithm is performed as follows: - * -# Let Y = X_{i-s}- X_{i-r} - c. - * -# Set X_i to y = T mod m. Set \c c to \c 1 if Y < 0, otherwise set \c c to \c 0. - * - * This algorithm corresponds to a modular linear function of the form - * - * TA(x_i) = (a * x_i) mod b, where \c b is of the form m^r - m^s + 1 and - * a = b - (b-1)/m. - * - * \tparam UIntType The type of unsigned integer to produce. - * \tparam w The word size of the produced values ( w <= sizeof(UIntType)). - * \tparam s The short lag of the generation algorithm. - * \tparam r The long lag of the generation algorithm. - * - * \note Inexperienced users should not use this class template directly. Instead, use - * \p ranlux24_base or \p ranlux48_base, which are instances of \p subtract_with_carry_engine. - * - * \see thrust::random::ranlux24_base - * \see thrust::random::ranlux48_base - */ -template - class subtract_with_carry_engine -{ - /*! \cond - */ - private: - static const UIntType modulus = UIntType(1) << w; - /*! \endcond - */ - - public: - // types - - /*! \typedef result_type - * \brief The type of the unsigned integer produced by this \p subtract_with_carry_engine. - */ - typedef UIntType result_type; - - // engine characteristics - - /*! The word size of the produced values. - */ - static const size_t word_size = w; - - /*! The size of the short lag used in the generation algorithm. - */ - static const size_t short_lag = s; - - /*! The size of the long lag used in the generation algorithm. - */ - static const size_t long_lag = r; - - /*! The smallest value this \p subtract_with_carry_engine may potentially produce. - */ - static const result_type min = 0; - - /*! The largest value this \p subtract_with_carry_engine may potentially produce. - */ - static const result_type max = modulus - 1; - - /*! The default seed of this \p subtract_with_carry_engine. - */ - static const result_type default_seed = 19780503u; - - // constructors and seeding functions - - /*! This constructor, which optionally accepts a seed, initializes a new - * \p subtract_with_carry_engine. - * - * \param value The seed used to intialize this \p subtract_with_carry_engine's state. - */ - __host__ __device__ - explicit subtract_with_carry_engine(result_type value = default_seed); - - /*! This method initializes this \p subtract_with_carry_engine's state, and optionally accepts - * a seed value. - * - * \param value The seed used to initializes this \p subtract_with_carry_engine's state. - */ - __host__ __device__ - void seed(result_type value = default_seed); - - // generating functions - - /*! This member function produces a new random value and updates this \p subtract_with_carry_engine's state. - * \return A new random number. - */ - __host__ __device__ - result_type operator()(void); - - /*! This member function advances this \p subtract_with_carry_engine's state a given number of times - * and discards the results. - * - * \param z The number of random values to discard. - * \note This function is provided because an implementation may be able to accelerate it. - */ - __host__ __device__ - void discard(unsigned long long z); - - /*! \cond - */ - private: - result_type m_x[long_lag]; - unsigned int m_k; - int m_carry; - - friend struct thrust::random::detail::random_core_access; - - __host__ __device__ - bool equal(const subtract_with_carry_engine &rhs) const; - - template - std::basic_ostream& stream_out(std::basic_ostream &os) const; - - template - std::basic_istream& stream_in(std::basic_istream &is); - - /*! \endcond - */ -}; // end subtract_with_carry_engine - - -/*! This function checks two \p subtract_with_carry_engines for equality. - * \param lhs The first \p subtract_with_carry_engine to test. - * \param rhs The second \p subtract_with_carry_engine to test. - * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator==(const subtract_with_carry_engine &lhs, - const subtract_with_carry_engine &rhs); - - -/*! This function checks two \p subtract_with_carry_engines for inequality. - * \param lhs The first \p subtract_with_carry_engine to test. - * \param rhs The second \p subtract_with_carry_engine to test. - * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator!=(const subtract_with_carry_engine&lhs, - const subtract_with_carry_engine&rhs); - - -/*! This function streams a subtract_with_carry_engine to a \p std::basic_ostream. - * \param os The \p basic_ostream to stream out to. - * \param e The \p subtract_with_carry_engine to stream out. - * \return \p os - */ -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const subtract_with_carry_engine &e); - - -/*! This function streams a subtract_with_carry_engine in from a std::basic_istream. - * \param is The \p basic_istream to stream from. - * \param e The \p subtract_with_carry_engine to stream in. - * \return \p is - */ -template -std::basic_istream& -operator>>(std::basic_istream &is, - subtract_with_carry_engine &e); - - -/*! \} // end random_number_engine_templates - */ - - -/*! \addtogroup predefined_random - * \{ - */ - -// XXX N2111 uses uint_fast32_t here - -/*! \typedef ranlux24_base - * \brief A random number engine with predefined parameters which implements the - * base engine of the \p ranlux24 random number engine. - * \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux24_base - * shall produce the value \c 7937952 . - */ -typedef subtract_with_carry_engine ranlux24_base; - - -// XXX N2111 uses uint_fast64_t here - -/*! \typedef ranlux48_base - * \brief A random number engine with predefined parameters which implements the - * base engine of the \p ranlux48 random number engine. - * \note The 10000th consecutive invocation of a default-constructed object of type \p ranlux48_base - * shall produce the value \c 192113843633948 . - */ -typedef subtract_with_carry_engine ranlux48_base; - -/*! \} // end predefined_random - */ - -} // end random - -// import names into thrust:: -using random::subtract_with_carry_engine; -using random::ranlux24_base; -using random::ranlux48_base; - -} // end thrust - -#include - diff --git a/compat/thrust/random/uniform_int_distribution.h b/compat/thrust/random/uniform_int_distribution.h deleted file mode 100644 index d05f7faaf3..0000000000 --- a/compat/thrust/random/uniform_int_distribution.h +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file uniform_int_distribution.h - * \brief A uniform distribution of integer-valued numbers - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - -/*! \addtogroup random_number_distributions Random Number Distributions Class Templates - * \ingroup random - * \{ - */ - -/*! \class uniform_int_distribution - * \brief A \p uniform_int_distribution random number distribution produces signed or unsigned integer - * uniform random numbers from a given range. - * - * \tparam IntType The type of integer to produce. - * - * The following code snippet demonstrates examples of using a \p uniform_int_distribution with a - * random number engine to produce random integers drawn from a given range: - * - * \code - * #include - * #include - * - * int main(void) - * { - * // create a minstd_rand object to act as our source of randomness - * thrust::minstd_rand rng; - * - * // create a uniform_int_distribution to produce ints from [-7,13] - * thrust::uniform_int_distribution dist(-7,13); - * - * // write a random number from the range [-7,13] to standard output - * std::cout << dist(rng) << std::endl; - * - * // write the range of the distribution, just in case we forgot - * std::cout << dist.min() << std::endl; - * - * // -7 is printed - * - * std::cout << dist.max() << std::endl; - * - * // 13 is printed - * - * // write the parameters of the distribution (which happen to be the bounds) to standard output - * std::cout << dist.a() << std::endl; - * - * // -7 is printed - * - * std::cout << dist.b() << std::endl; - * - * // 13 is printed - * - * return 0; - * } - * \endcode - */ -template - class uniform_int_distribution -{ - public: - // types - - /*! \typedef result_type - * \brief The type of the integer produced by this \p uniform_int_distribution. - */ - typedef IntType result_type; - - /*! \typedef param_type - * \brief The type of the object encapsulating this \p uniform_int_distribution's parameters. - */ - typedef thrust::pair param_type; - - // constructors and reset functions - - /*! This constructor creates a new \p uniform_int_distribution from two values defining the - * range of the distribution. - * - * \param a The smallest integer to potentially produce. Defaults to \c 0. - * \param b The largest integer to potentially produce. Defaults to the largest representable integer in - * the platform. - */ - __host__ __device__ - explicit uniform_int_distribution(IntType a = 0, IntType b = thrust::detail::integer_traits::const_max); - - /*! This constructor creates a new \p uniform_int_distribution from a \p param_type object - * encapsulating the range of the distribution. - * - * \param parm A \p param_type object encapsulating the parameters (i.e., the range) of the distribution. - */ - __host__ __device__ - explicit uniform_int_distribution(const param_type &parm); - - /*! This does nothing. It is included to conform to the requirements of the RandomDistribution concept. - */ - __host__ __device__ - void reset(void); - - // generating functions - - /*! This method produces a new uniform random integer drawn from this \p uniform_int_distribution's - * range using a \p UniformRandomNumberGenerator as a source of randomness. - * - * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. - */ - template - __host__ __device__ - result_type operator()(UniformRandomNumberGenerator &urng); - - /*! This method produces a new uniform random integer as if by creating a new \p uniform_int_distribution - * from the given \p param_type object, and calling its operator() method with the given - * \p UniformRandomNumberGenerator as a source of randomness. - * - * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. - * \param parm A \p param_type object encapsulating the parameters of the \p uniform_int_distribution - * to draw from. - */ - template - __host__ __device__ - result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); - - // property functions - - /*! This method returns the value of the parameter with which this \p uniform_int_distribution - * was constructed. - * - * \return The lower bound of this \p uniform_int_distribution's range. - */ - __host__ __device__ - result_type a(void) const; - - /*! This method returns the value of the parameter with which this \p uniform_int_distribution - * was constructed. - * - * \return The upper bound of this \p uniform_int_distribution's range. - */ - __host__ __device__ - result_type b(void) const; - - /*! This method returns a \p param_type object encapsulating the parameters with which this - * \p uniform_int_distribution was constructed. - * - * \return A \p param_type object enapsulating the range of this \p uniform_int_distribution. - */ - __host__ __device__ - param_type param(void) const; - - /*! This method changes the parameters of this \p uniform_int_distribution using the values encapsulated - * in a given \p param_type object. - * - * \param parm A \p param_type object encapsulating the new range of this \p uniform_int_distribution. - */ - __host__ __device__ - void param(const param_type &parm); - - /*! This method returns the smallest integer this \p uniform_int_distribution can potentially produce. - * - * \return The lower bound of this \p uniform_int_distribution's range. - */ - __host__ __device__ - result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; - - /*! This method returns the largest integer this \p uniform_int_distribution can potentially produce. - * - * \return The upper bound of this \p uniform_int_distribution's range. - */ - __host__ __device__ - result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; - - /*! \cond - */ - private: - param_type m_param; - - friend struct thrust::random::detail::random_core_access; - - __host__ __device__ - bool equal(const uniform_int_distribution &rhs) const; - - template - std::basic_ostream& stream_out(std::basic_ostream &os) const; - - template - std::basic_istream& stream_in(std::basic_istream &is); - /*! \endcond - */ -}; // end uniform_int_distribution - - -/*! This function checks two \p uniform_int_distributions for equality. - * \param lhs The first \p uniform_int_distribution to test. - * \param rhs The second \p uniform_int_distribution to test. - * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator==(const uniform_int_distribution &lhs, - const uniform_int_distribution &rhs); - - -/*! This function checks two \p uniform_int_distributions for inequality. - * \param lhs The first \p uniform_int_distribution to test. - * \param rhs The second \p uniform_int_distribution to test. - * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator!=(const uniform_int_distribution &lhs, - const uniform_int_distribution &rhs); - - -/*! This function streams a uniform_int_distribution to a \p std::basic_ostream. - * \param os The \p basic_ostream to stream out to. - * \param d The \p uniform_int_distribution to stream out. - * \return \p os - */ -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const uniform_int_distribution &d); - - -/*! This function streams a uniform_int_distribution in from a std::basic_istream. - * \param is The \p basic_istream to stream from. - * \param d The \p uniform_int_distribution to stream in. - * \return \p is - */ -template -std::basic_istream& -operator>>(std::basic_istream &is, - uniform_int_distribution &d); - - -/*! \} // end random_number_distributions - */ - - -} // end random - -using random::uniform_int_distribution; - -} // end thrust - -#include - diff --git a/compat/thrust/random/uniform_real_distribution.h b/compat/thrust/random/uniform_real_distribution.h deleted file mode 100644 index ab85ab33dc..0000000000 --- a/compat/thrust/random/uniform_real_distribution.h +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file uniform_real_distribution.h - * \brief A uniform distribution of real-valued numbers - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -namespace random -{ - - -/*! \addtogroup random_number_distributions - * \{ - */ - -/*! \class uniform_real_distribution - * \brief A \p uniform_real_distribution random number distribution produces floating point - * uniform random numbers from a half-open interval. - * - * \tparam RealType The type of floating point number to produce. - * - * The following code snippet demonstrates examples of using a \p uniform_real_distribution with a - * random number engine to produce random integers drawn from a given range: - * - * \code - * #include - * #include - * - * int main(void) - * { - * // create a minstd_rand object to act as our source of randomness - * thrust::minstd_rand rng; - * - * // create a uniform_real_distribution to produce floats from [-7,13) - * thrust::uniform_real_distribution dist(-7,13); - * - * // write a random number from the range [-7,13) to standard output - * std::cout << dist(rng) << std::endl; - * - * // write the range of the distribution, just in case we forgot - * std::cout << dist.min() << std::endl; - * - * // -7.0 is printed - * - * std::cout << dist.max() << std::endl; - * - * // 13.0 is printed - * - * // write the parameters of the distribution (which happen to be the bounds) to standard output - * std::cout << dist.a() << std::endl; - * - * // -7.0 is printed - * - * std::cout << dist.b() << std::endl; - * - * // 13.0 is printed - * - * return 0; - * } - * \endcode - */ -template - class uniform_real_distribution -{ - public: - // types - - /*! \typedef result_type - * \brief The type of the floating point number produced by this \p uniform_real_distribution. - */ - typedef RealType result_type; - - /*! \typedef param_type - * \brief The type of the object encapsulating this \p uniform_real_distribution's parameters. - */ - typedef thrust::pair param_type; - - // constructors and reset functions - - /*! This constructor creates a new \p uniform_real_distribution from two values defining the - * half-open interval of the distribution. - * - * \param a The smallest floating point number to potentially produce. Defaults to \c 0.0. - * \param b The smallest number larger than the largest floating point number to potentially produce. Defaults to \c 1.0. - */ - __host__ __device__ - explicit uniform_real_distribution(RealType a = 0.0, RealType b = 1.0); - - /*! This constructor creates a new \p uniform_real_distribution from a \p param_type object - * encapsulating the range of the distribution. - * - * \param parm A \p param_type object encapsulating the parameters (i.e., the range) of the distribution. - */ - __host__ __device__ - explicit uniform_real_distribution(const param_type &parm); - - /*! This does nothing. It is included to conform to the requirements of the RandomDistribution concept. - */ - __host__ __device__ - void reset(void); - - // generating functions - - /*! This method produces a new uniform random integer drawn from this \p uniform_real_distribution's - * range using a \p UniformRandomNumberGenerator as a source of randomness. - * - * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. - */ - template - __host__ __device__ - result_type operator()(UniformRandomNumberGenerator &urng); - - /*! This method produces a new uniform random integer as if by creating a new \p uniform_real_distribution - * from the given \p param_type object, and calling its operator() method with the given - * \p UniformRandomNumberGenerator as a source of randomness. - * - * \param urng The \p UniformRandomNumberGenerator to use as a source of randomness. - * \param parm A \p param_type object encapsulating the parameters of the \p uniform_real_distribution - * to draw from. - */ - template - __host__ __device__ - result_type operator()(UniformRandomNumberGenerator &urng, const param_type &parm); - - // property functions - - /*! This method returns the value of the parameter with which this \p uniform_real_distribution - * was constructed. - * - * \return The lower bound of this \p uniform_real_distribution's half-open interval. - */ - __host__ __device__ - result_type a(void) const; - - /*! This method returns the value of the parameter with which this \p uniform_real_distribution - * was constructed. - * - * \return The upper bound of this \p uniform_real_distribution's half-open interval. - */ - __host__ __device__ - result_type b(void) const; - - /*! This method returns a \p param_type object encapsulating the parameters with which this - * \p uniform_real_distribution was constructed. - * - * \return A \p param_type object enapsulating the half-open interval of this \p uniform_real_distribution. - */ - __host__ __device__ - param_type param(void) const; - - /*! This method changes the parameters of this \p uniform_real_distribution using the values encapsulated - * in a given \p param_type object. - * - * \param parm A \p param_type object encapsulating the new half-open interval of this \p uniform_real_distribution. - */ - __host__ __device__ - void param(const param_type &parm); - - /*! This method returns the smallest floating point number this \p uniform_real_distribution can potentially produce. - * - * \return The lower bound of this \p uniform_real_distribution's half-open interval. - */ - __host__ __device__ - result_type min THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; - - /*! This method returns the smallest number larger than largest floating point number this \p uniform_real_distribution can potentially produce. - * - * \return The upper bound of this \p uniform_real_distribution's half-open interval. - */ - __host__ __device__ - result_type max THRUST_PREVENT_MACRO_SUBSTITUTION (void) const; - - /*! \cond - */ - private: - param_type m_param; - - friend struct thrust::random::detail::random_core_access; - - __host__ __device__ - bool equal(const uniform_real_distribution &rhs) const; - - template - std::basic_ostream& stream_out(std::basic_ostream &os) const; - - template - std::basic_istream& stream_in(std::basic_istream &is); - /*! \endcond - */ -}; // end uniform_real_distribution - - -/*! This function checks two \p uniform_real_distributions for equality. - * \param lhs The first \p uniform_real_distribution to test. - * \param rhs The second \p uniform_real_distribution to test. - * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator==(const uniform_real_distribution &lhs, - const uniform_real_distribution &rhs); - - -/*! This function checks two \p uniform_real_distributions for inequality. - * \param lhs The first \p uniform_real_distribution to test. - * \param rhs The second \p uniform_real_distribution to test. - * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator!=(const uniform_real_distribution &lhs, - const uniform_real_distribution &rhs); - - -/*! This function streams a uniform_real_distribution to a \p std::basic_ostream. - * \param os The \p basic_ostream to stream out to. - * \param d The \p uniform_real_distribution to stream out. - * \return \p os - */ -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const uniform_real_distribution &d); - - -/*! This function streams a uniform_real_distribution in from a std::basic_istream. - * \param is The \p basic_istream to stream from. - * \param d The \p uniform_real_distribution to stream in. - * \return \p is - */ -template -std::basic_istream& -operator>>(std::basic_istream &is, - uniform_real_distribution &d); - - -/*! \} // end random_number_distributions - */ - - -} // end random - -using random::uniform_real_distribution; - -} // end thrust - -#include - diff --git a/compat/thrust/random/xor_combine_engine.h b/compat/thrust/random/xor_combine_engine.h deleted file mode 100644 index 61eb5a50c2..0000000000 --- a/compat/thrust/random/xor_combine_engine.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file xor_combine_engine.h - * \brief A pseudorandom number generator which produces pseudorandom - * numbers from two integer base engines by merging their - * pseudorandom numbers with bitwise exclusive-or. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include // for size_t - -namespace thrust -{ - -namespace random -{ - -/*! \addtogroup random_number_engine_adaptors - * \{ - */ - -/*! \class xor_combine_engine - * \brief An \p xor_combine_engine adapts two existing base random number engines and - * produces random values by combining the values produced by each. - * - * \tparam Engine1 The type of the first base random number engine to adapt. - * \tparam s1 The size of the first shift to use in the generation algorithm. - * \tparam Engine2 The type of the second base random number engine to adapt. - * \tparam s2 The second of the second shift to use in the generation algorithm. Defaults to \c 0. - * - * The following code snippet shows an example of using an \p xor_combine_engine instance: - * - * \code - * #include - * #include - * #include - * - * int main(void) - * { - * // create an xor_combine_engine from minstd_rand and minstd_rand0 - * // use a shift of 0 for each - * thrust::xor_combine_engine rng; - * - * // print a random number to standard output - * std::cout << rng() << std::endl; - * - * return 0; - * } - * \endcode - */ -template - class xor_combine_engine -{ - public: - // types - - /*! \typedef base1_type - * \brief The type of the first adapted base random number engine. - */ - typedef Engine1 base1_type; - - /*! \typedef base2_type - * \brief The type of the second adapted base random number engine. - */ - typedef Engine2 base2_type; - - /*! \typedef result_type - * \brief The type of the unsigned integer produced by this \p xor_combine_engine. - */ - typedef typename thrust::detail::eval_if< - (sizeof(typename base2_type::result_type) > sizeof(typename base1_type::result_type)), - thrust::detail::identity_, - thrust::detail::identity_ - >::type result_type; - - /*! The size of the first shift used in the generation algorithm. - */ - static const size_t shift1 = s1; - - /*! The size of the second shift used in the generation algorithm. - */ - static const size_t shift2 = s2; - - /*! The smallest value this \p xor_combine_engine may potentially produce. - */ - static const result_type min = 0; - - /*! The largest value this \p xor_combine_engine may potentially produce. - */ - static const result_type max = - detail::xor_combine_engine_max< - Engine1, s1, Engine2, s2, result_type - >::value; - - // constructors and seeding functions - - /*! This constructor constructs a new \p xor_combine_engine and constructs - * its adapted engines using their null constructors. - */ - __host__ __device__ - xor_combine_engine(void); - - /*! This constructor constructs a new \p xor_combine_engine using - * given \p base1_type and \p base2_type engines to initialize its adapted base engines. - * - * \param urng1 A \p base1_type to use to initialize this \p xor_combine_engine's - * first adapted base engine. - * \param urng2 A \p base2_type to use to initialize this \p xor_combine_engine's - * first adapted base engine. - */ - __host__ __device__ - xor_combine_engine(const base1_type &urng1, const base2_type &urng2); - - /*! This constructor initializes a new \p xor_combine_engine with a given seed. - * - * \param s The seed used to intialize this \p xor_combine_engine's adapted base engines. - */ - __host__ __device__ - xor_combine_engine(result_type s); - - /*! This method initializes the state of this \p xor_combine_engine's adapted base engines - * by using their \p default_seed values. - */ - __host__ __device__ - void seed(void); - - /*! This method initializes the state of this \p xor_combine_engine's adapted base engines - * by using the given seed. - * - * \param s The seed with which to intialize this \p xor_combine_engine's adapted base engines. - */ - __host__ __device__ - void seed(result_type s); - - // generating functions - - /*! This member function produces a new random value and updates this \p xor_combine_engine's state. - * \return A new random number. - */ - __host__ __device__ - result_type operator()(void); - - /*! This member function advances this \p xor_combine_engine's state a given number of times - * and discards the results. - * - * \param z The number of random values to discard. - * \note This function is provided because an implementation may be able to accelerate it. - */ - __host__ __device__ - void discard(unsigned long long z); - - // property functions - - /*! This member function returns a const reference to this \p xor_combine_engine's - * first adapted base engine. - * - * \return A const reference to the first base engine this \p xor_combine_engine adapts. - */ - __host__ __device__ - const base1_type &base1(void) const; - - /*! This member function returns a const reference to this \p xor_combine_engine's - * second adapted base engine. - * - * \return A const reference to the second base engine this \p xor_combine_engine adapts. - */ - __host__ __device__ - const base2_type &base2(void) const; - - /*! \cond - */ - private: - base1_type m_b1; - base2_type m_b2; - - friend struct thrust::random::detail::random_core_access; - - __host__ __device__ - bool equal(const xor_combine_engine &rhs) const; - - template - std::basic_istream& stream_in(std::basic_istream &is); - - template - std::basic_ostream& stream_out(std::basic_ostream &os) const; - - /*! \endcond - */ -}; // end xor_combine_engine - - -/*! This function checks two \p xor_combine_engines for equality. - * \param lhs The first \p xor_combine_engine to test. - * \param rhs The second \p xor_combine_engine to test. - * \return \c true if \p lhs is equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator==(const xor_combine_engine &lhs, - const xor_combine_engine &rhs); - - -/*! This function checks two \p xor_combine_engines for inequality. - * \param lhs The first \p xor_combine_engine to test. - * \param rhs The second \p xor_combine_engine to test. - * \return \c true if \p lhs is not equal to \p rhs; \c false, otherwise. - */ -template -__host__ __device__ -bool operator!=(const xor_combine_engine &lhs, - const xor_combine_engine &rhs); - - -/*! This function streams a xor_combine_engine to a \p std::basic_ostream. - * \param os The \p basic_ostream to stream out to. - * \param e The \p xor_combine_engine to stream out. - * \return \p os - */ -template -std::basic_ostream& -operator<<(std::basic_ostream &os, - const xor_combine_engine &e); - - -/*! This function streams a xor_combine_engine in from a std::basic_istream. - * \param is The \p basic_istream to stream from. - * \param e The \p xor_combine_engine to stream in. - * \return \p is - */ -template -std::basic_istream& -operator>>(std::basic_istream &is, - xor_combine_engine &e); - - -/*! \} // end random_number_engine_adaptors - */ - - -} // end random - -// import names into thrust:: -using random::xor_combine_engine; - -} // end thrust - -#include - diff --git a/compat/thrust/reduce.h b/compat/thrust/reduce.h deleted file mode 100644 index 1dc931f9a1..0000000000 --- a/compat/thrust/reduce.h +++ /dev/null @@ -1,779 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief Functions for reducing a range to a single value - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup reductions - * \{ - */ - - -/*! \p reduce is a generalization of summation: it computes the sum (or some - * other binary operation) of all the elements in the range [first, - * last). This version of \p reduce uses \c 0 as the initial value of the - * reduction. \p reduce is similar to the C++ Standard Template Library's - * std::accumulate. The primary difference between the two functions - * is that std::accumulate guarantees the order of summation, while - * \p reduce requires associativity of the binary operation to parallelize - * the reduction. - * - * Note that \p reduce also assumes that the binary reduction operator (in this - * case operator+) is commutative. If the reduction operator is not commutative - * then \p thrust::reduce should not be used. Instead, one could use - * \p inclusive_scan (which does not require commutativity) and select the - * last element of the output array. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return The result of the reduction. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and if \c x and \c y are objects of \p InputIterator's \c value_type, - * then x + y is defined and is convertible to \p InputIterator's - * \c value_type. If \c T is \c InputIterator's \c value_type, then - * T(0) is defined. - * - * The following code snippet demonstrates how to use \p reduce to compute - * the sum of a sequence of integers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int result = thrust::reduce(thrust::host, data, data + 6); - * - * // result == 9 - * \endcode - * - * \see http://www.sgi.com/tech/stl/accumulate.html - */ -template - typename thrust::iterator_traits::value_type - reduce(const thrust::detail::execution_policy_base &exec, InputIterator first, InputIterator last); - - -/*! \p reduce is a generalization of summation: it computes the sum (or some - * other binary operation) of all the elements in the range [first, - * last). This version of \p reduce uses \c 0 as the initial value of the - * reduction. \p reduce is similar to the C++ Standard Template Library's - * std::accumulate. The primary difference between the two functions - * is that std::accumulate guarantees the order of summation, while - * \p reduce requires associativity of the binary operation to parallelize - * the reduction. - * - * Note that \p reduce also assumes that the binary reduction operator (in this - * case operator+) is commutative. If the reduction operator is not commutative - * then \p thrust::reduce should not be used. Instead, one could use - * \p inclusive_scan (which does not require commutativity) and select the - * last element of the output array. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return The result of the reduction. - * - * \tparam InputIterator is a model of Input Iterator - * and if \c x and \c y are objects of \p InputIterator's \c value_type, - * then x + y is defined and is convertible to \p InputIterator's - * \c value_type. If \c T is \c InputIterator's \c value_type, then - * T(0) is defined. - * - * The following code snippet demonstrates how to use \p reduce to compute - * the sum of a sequence of integers. - * - * \code - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int result = thrust::reduce(data, data + 6); - * - * // result == 9 - * \endcode - * - * \see http://www.sgi.com/tech/stl/accumulate.html - */ -template typename - thrust::iterator_traits::value_type reduce(InputIterator first, InputIterator last); - - -/*! \p reduce is a generalization of summation: it computes the sum (or some - * other binary operation) of all the elements in the range [first, - * last). This version of \p reduce uses \p init as the initial value of the - * reduction. \p reduce is similar to the C++ Standard Template Library's - * std::accumulate. The primary difference between the two functions - * is that std::accumulate guarantees the order of summation, while - * \p reduce requires associativity of the binary operation to parallelize - * the reduction. - * - * Note that \p reduce also assumes that the binary reduction operator (in this - * case operator+) is commutative. If the reduction operator is not commutative - * then \p thrust::reduce should not be used. Instead, one could use - * \p inclusive_scan (which does not require commutativity) and select the - * last element of the output array. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param init The initial value. - * \return The result of the reduction. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and if \c x and \c y are objects of \p InputIterator's \c value_type, - * then x + y is defined and is convertible to \p T. - * \tparam T is convertible to \p InputIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p reduce to compute - * the sum of a sequence of integers including an intialization value using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int result = thrust::reduce(thrust::host, data, data + 6, 1); - * - * // result == 10 - * \endcode - * - * \see http://www.sgi.com/tech/stl/accumulate.html - */ -template - T reduce(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - T init); - - -/*! \p reduce is a generalization of summation: it computes the sum (or some - * other binary operation) of all the elements in the range [first, - * last). This version of \p reduce uses \p init as the initial value of the - * reduction. \p reduce is similar to the C++ Standard Template Library's - * std::accumulate. The primary difference between the two functions - * is that std::accumulate guarantees the order of summation, while - * \p reduce requires associativity of the binary operation to parallelize - * the reduction. - * - * Note that \p reduce also assumes that the binary reduction operator (in this - * case operator+) is commutative. If the reduction operator is not commutative - * then \p thrust::reduce should not be used. Instead, one could use - * \p inclusive_scan (which does not require commutativity) and select the - * last element of the output array. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param init The initial value. - * \return The result of the reduction. - * - * \tparam InputIterator is a model of Input Iterator - * and if \c x and \c y are objects of \p InputIterator's \c value_type, - * then x + y is defined and is convertible to \p T. - * \tparam T is convertible to \p InputIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p reduce to compute - * the sum of a sequence of integers including an intialization value. - * - * \code - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int result = thrust::reduce(data, data + 6, 1); - * - * // result == 10 - * \endcode - * - * \see http://www.sgi.com/tech/stl/accumulate.html - */ -template - T reduce(InputIterator first, - InputIterator last, - T init); - - -/*! \p reduce is a generalization of summation: it computes the sum (or some - * other binary operation) of all the elements in the range [first, - * last). This version of \p reduce uses \p init as the initial value of the - * reduction and \p binary_op as the binary function used for summation. \p reduce - * is similar to the C++ Standard Template Library's std::accumulate. - * The primary difference between the two functions is that std::accumulate - * guarantees the order of summation, while \p reduce requires associativity of - * \p binary_op to parallelize the reduction. - * - * Note that \p reduce also assumes that the binary reduction operator (in this - * case \p binary_op) is commutative. If the reduction operator is not commutative - * then \p thrust::reduce should not be used. Instead, one could use - * \p inclusive_scan (which does not require commutativity) and select the - * last element of the output array. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param init The initial value. - * \param binary_op The binary function used to 'sum' values. - * \return The result of the reduction. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to \c T. - * \tparam T is a model of Assignable, - * and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type. - * \tparam BinaryFunction is a model of Binary Function, - * and \p BinaryFunction's \c result_type is convertible to \p OutputType. - * - * The following code snippet demonstrates how to use \p reduce to - * compute the maximum value of a sequence of integers using the \p thrust::host execution policy - * for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int result = thrust::reduce(thrust::host, - * data, data + 6, - * -1, - * thrust::maximum()); - * // result == 3 - * \endcode - * - * \see http://www.sgi.com/tech/stl/accumulate.html - * \see transform_reduce - */ -template - T reduce(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - T init, - BinaryFunction binary_op); - - -/*! \p reduce is a generalization of summation: it computes the sum (or some - * other binary operation) of all the elements in the range [first, - * last). This version of \p reduce uses \p init as the initial value of the - * reduction and \p binary_op as the binary function used for summation. \p reduce - * is similar to the C++ Standard Template Library's std::accumulate. - * The primary difference between the two functions is that std::accumulate - * guarantees the order of summation, while \p reduce requires associativity of - * \p binary_op to parallelize the reduction. - * - * Note that \p reduce also assumes that the binary reduction operator (in this - * case \p binary_op) is commutative. If the reduction operator is not commutative - * then \p thrust::reduce should not be used. Instead, one could use - * \p inclusive_scan (which does not require commutativity) and select the - * last element of the output array. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param init The initial value. - * \param binary_op The binary function used to 'sum' values. - * \return The result of the reduction. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to \c T. - * \tparam T is a model of Assignable, - * and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type. - * \tparam BinaryFunction is a model of Binary Function, - * and \p BinaryFunction's \c result_type is convertible to \p OutputType. - * - * The following code snippet demonstrates how to use \p reduce to - * compute the maximum value of a sequence of integers. - * - * \code - * #include - * #include - * ... - * int data[6] = {1, 0, 2, 2, 1, 3}; - * int result = thrust::reduce(data, data + 6, - * -1, - * thrust::maximum()); - * // result == 3 - * \endcode - * - * \see http://www.sgi.com/tech/stl/accumulate.html - * \see transform_reduce - */ -template - T reduce(InputIterator first, - InputIterator last, - T init, - BinaryFunction binary_op); - - -/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p reduce_by_key copies the first element of the group to the - * \c keys_output. The corresponding values in the range are reduced using the - * \c plus and the result copied to \c values_output. - * - * This version of \p reduce_by_key uses the function object \c equal_to - * to test for equality and \c plus to reduce values with equal keys. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_output The beginning of the output key range. - * \param values_output The beginning of the output value range. - * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p reduce_by_key to - * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. - * \endcode - * - * \see reduce - * \see unique_copy - * \see unique_by_key - * \see unique_by_key_copy - */ -template - thrust::pair - reduce_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output); - - -/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p reduce_by_key copies the first element of the group to the - * \c keys_output. The corresponding values in the range are reduced using the - * \c plus and the result copied to \c values_output. - * - * This version of \p reduce_by_key uses the function object \c equal_to - * to test for equality and \c plus to reduce values with equal keys. - * - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_output The beginning of the output key range. - * \param values_output The beginning of the output value range. - * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). - * - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p reduce_by_key to - * compact a sequence of key/value pairs and sum values with equal keys. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * new_end = thrust::reduce_by_key(A, A + N, B, C, D); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. - * \endcode - * - * \see reduce - * \see unique_copy - * \see unique_by_key - * \see unique_by_key_copy - */ -template - thrust::pair - reduce_by_key(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output); - - -/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p reduce_by_key copies the first element of the group to the - * \c keys_output. The corresponding values in the range are reduced using the - * \c plus and the result copied to \c values_output. - * - * This version of \p reduce_by_key uses the function object \c binary_pred - * to test for equality and \c plus to reduce values with equal keys. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_output The beginning of the output key range. - * \param values_output The beginning of the output value range. - * \param binary_pred The binary predicate used to determine equality. - * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p reduce_by_key to - * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * thrust::equal_to binary_pred; - * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. - * \endcode - * - * \see reduce - * \see unique_copy - * \see unique_by_key - * \see unique_by_key_copy - */ -template - thrust::pair - reduce_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred); - - -/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p reduce_by_key copies the first element of the group to the - * \c keys_output. The corresponding values in the range are reduced using the - * \c plus and the result copied to \c values_output. - * - * This version of \p reduce_by_key uses the function object \c binary_pred - * to test for equality and \c plus to reduce values with equal keys. - * - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_output The beginning of the output key range. - * \param values_output The beginning of the output value range. - * \param binary_pred The binary predicate used to determine equality. - * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). - * - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p reduce_by_key to - * compact a sequence of key/value pairs and sum values with equal keys. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * thrust::equal_to binary_pred; - * new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. - * \endcode - * - * \see reduce - * \see unique_copy - * \see unique_by_key - * \see unique_by_key_copy - */ -template - thrust::pair - reduce_by_key(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred); - - -/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p reduce_by_key copies the first element of the group to the - * \c keys_output. The corresponding values in the range are reduced using the - * \c BinaryFunction \c binary_op and the result copied to \c values_output. - * Specifically, if consecutive key iterators \c i and \c (i + 1) are - * such that binary_pred(*i, *(i+1)) is \c true, then the corresponding - * values are reduced to a single value with \c binary_op. - * - * This version of \p reduce_by_key uses the function object \c binary_pred - * to test for equality and \c binary_op to reduce values with equal keys. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_output The beginning of the output key range. - * \param values_output The beginning of the output value range. - * \param binary_pred The binary predicate used to determine equality. - * \param binary_op The binary function used to accumulate values. - * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * \tparam BinaryFunction is a model of Binary Function - * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p reduce_by_key to - * compact a sequence of key/value pairs and sum values with equal keys using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * thrust::equal_to binary_pred; - * thrust::plus binary_op; - * new_end = thrust::reduce_by_key(thrust::host, A, A + N, B, C, D, binary_pred, binary_op); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. - * \endcode - * - * \see reduce - * \see unique_copy - * \see unique_by_key - * \see unique_by_key_copy - */ -template - thrust::pair - reduce_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op); - - -/*! \p reduce_by_key is a generalization of \p reduce to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p reduce_by_key copies the first element of the group to the - * \c keys_output. The corresponding values in the range are reduced using the - * \c BinaryFunction \c binary_op and the result copied to \c values_output. - * Specifically, if consecutive key iterators \c i and \c (i + 1) are - * such that binary_pred(*i, *(i+1)) is \c true, then the corresponding - * values are reduced to a single value with \c binary_op. - * - * This version of \p reduce_by_key uses the function object \c binary_pred - * to test for equality and \c binary_op to reduce values with equal keys. - * - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_output The beginning of the output key range. - * \param values_output The beginning of the output value range. - * \param binary_pred The binary predicate used to determine equality. - * \param binary_op The binary function used to accumulate values. - * \return A pair of iterators at end of the ranges [keys_output, keys_output_last) and [values_output, values_output_last). - * - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * \tparam BinaryFunction is a model of Binary Function - * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator2's \c value_type. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p reduce_by_key to - * compact a sequence of key/value pairs and sum values with equal keys. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * thrust::equal_to binary_pred; - * thrust::plus binary_op; - * new_end = thrust::reduce_by_key(A, A + N, B, C, D, binary_pred, binary_op); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 21, 9, 3} and new_end.second - D is 4. - * \endcode - * - * \see reduce - * \see unique_copy - * \see unique_by_key - * \see unique_by_key_copy - */ -template - thrust::pair - reduce_by_key(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op); - - -/*! \} // end reductions - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/remove.h b/compat/thrust/remove.h deleted file mode 100644 index c538776258..0000000000 --- a/compat/thrust/remove.h +++ /dev/null @@ -1,800 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file remove.h - * \brief Functions for removing elements from a range - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup stream_compaction Stream Compaction - * \ingroup reordering - * \{ - * - */ - - -/*! \p remove removes from the range [first, last) all elements that are - * equal to \p value. That is, \p remove returns an iterator \p new_last such - * that the range [first, new_last) contains no elements equal to - * \p value. The iterators in the range [new_first,last) are all still - * dereferenceable, but the elements that they point to are unspecified. \p remove - * is stable, meaning that the relative order of elements that are not equal to - * \p value is unchanged. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param value The value to remove from the range [first, last). - * Elements which are equal to value are removed from the sequence. - * \return A \p ForwardIterator pointing to the end of the resulting range of - * elements which are not equal to \p value. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam T is a model of Equality Comparable, - * and objects of type \p T can be compared for equality with objects of \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p remove to remove a number - * of interest from a range using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int A[N] = {3, 1, 4, 1, 5, 9}; - * int *new_end = thrust::remove(A, A + N, 1); - * // The first four values of A are now {3, 4, 5, 9} - * // Values beyond new_end are unspecified - * \endcode - * - * \note The meaning of "removal" is somewhat subtle. \p remove does not destroy any - * iterators, and does not change the distance between \p first and \p last. - * (There's no way that it could do anything of the sort.) So, for example, if - * \c V is a device_vector, remove(V.begin(), V.end(), 0) does not - * change V.size(): \c V will contain just as many elements as it did - * before. \p remove returns an iterator that points to the end of the resulting - * range after elements have been removed from it; it follows that the elements - * after that iterator are of no interest, and may be discarded. If you are - * removing elements from a - * Sequence, you may - * simply erase them. That is, a reasonable way of removing elements from a - * Sequence is - * S.erase(remove(S.begin(), S.end(), x), S.end()). - * - * \see http://www.sgi.com/tech/stl/remove.html - * \see remove_if - * \see remove_copy - * \see remove_copy_if - */ -template - ForwardIterator remove(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &value); - - -/*! \p remove removes from the range [first, last) all elements that are - * equal to \p value. That is, \p remove returns an iterator \p new_last such - * that the range [first, new_last) contains no elements equal to - * \p value. The iterators in the range [new_first,last) are all still - * dereferenceable, but the elements that they point to are unspecified. \p remove - * is stable, meaning that the relative order of elements that are not equal to - * \p value is unchanged. - * - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param value The value to remove from the range [first, last). - * Elements which are equal to value are removed from the sequence. - * \return A \p ForwardIterator pointing to the end of the resulting range of - * elements which are not equal to \p value. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam T is a model of Equality Comparable, - * and objects of type \p T can be compared for equality with objects of \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p remove to remove a number - * of interest from a range. - * - * \code - * #include - * ... - * const int N = 6; - * int A[N] = {3, 1, 4, 1, 5, 9}; - * int *new_end = thrust::remove(A, A + N, 1); - * // The first four values of A are now {3, 4, 5, 9} - * // Values beyond new_end are unspecified - * \endcode - * - * \note The meaning of "removal" is somewhat subtle. \p remove does not destroy any - * iterators, and does not change the distance between \p first and \p last. - * (There's no way that it could do anything of the sort.) So, for example, if - * \c V is a device_vector, remove(V.begin(), V.end(), 0) does not - * change V.size(): \c V will contain just as many elements as it did - * before. \p remove returns an iterator that points to the end of the resulting - * range after elements have been removed from it; it follows that the elements - * after that iterator are of no interest, and may be discarded. If you are - * removing elements from a - * Sequence, you may - * simply erase them. That is, a reasonable way of removing elements from a - * Sequence is - * S.erase(remove(S.begin(), S.end(), x), S.end()). - * - * \see http://www.sgi.com/tech/stl/remove.html - * \see remove_if - * \see remove_copy - * \see remove_copy_if - */ -template - ForwardIterator remove(ForwardIterator first, - ForwardIterator last, - const T &value); - - -/*! \p remove_copy copies elements that are not equal to \p value from the range - * [first, last) to a range beginning at \p result. The return value is - * the end of the resulting range. This operation is stable, meaning that the - * relative order of the elements that are copied is the same as in - * the range [first, last). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param result The resulting range is copied to the sequence beginning at this - * location. - * \param value The value to omit from the copied range. - * \return An OutputIterator pointing to the end of the resulting range of elements - * which are not equal to \p value. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam T is a model of Equality Comparable, - * and objects of type \p T can be compared for equality with objects of \p InputIterator's \c value_type. - * - * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). - * - * The following code snippet demonstrates how to use \p remove_copy to copy - * a sequence of numbers to an output range while omitting a value of interest using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int V[N] = {-2, 0, -1, 0, 1, 2}; - * int result[N-2]; - * thrust::remove_copy(thrust::host, V, V + N, result, 0); - * // V remains {-2, 0, -1, 0, 1, 2} - * // result is now {-2, -1, 1, 2} - * \endcode - * - * \see http://www.sgi.com/tech/stl/remove_copy.html - * \see remove - * \see remove_if - * \see remove_copy_if - */ -template - OutputIterator remove_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - const T &value); - - -/*! \p remove_copy copies elements that are not equal to \p value from the range - * [first, last) to a range beginning at \p result. The return value is - * the end of the resulting range. This operation is stable, meaning that the - * relative order of the elements that are copied is the same as in - * the range [first, last). - * - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param result The resulting range is copied to the sequence beginning at this - * location. - * \param value The value to omit from the copied range. - * \return An OutputIterator pointing to the end of the resulting range of elements - * which are not equal to \p value. - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam T is a model of Equality Comparable, - * and objects of type \p T can be compared for equality with objects of \p InputIterator's \c value_type. - * - * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). - * - * The following code snippet demonstrates how to use \p remove_copy to copy - * a sequence of numbers to an output range while omitting a value of interest. - * - * \code - * #include - * ... - * const int N = 6; - * int V[N] = {-2, 0, -1, 0, 1, 2}; - * int result[N-2]; - * thrust::remove_copy(V, V + N, result, 0); - * // V remains {-2, 0, -1, 0, 1, 2} - * // result is now {-2, -1, 1, 2} - * \endcode - * - * \see http://www.sgi.com/tech/stl/remove_copy.html - * \see remove - * \see remove_if - * \see remove_copy_if - */ -template - OutputIterator remove_copy(InputIterator first, - InputIterator last, - OutputIterator result, - const T &value); - - -/*! \p remove_if removes from the range [first, last) every element \p x - * such that pred(x) is \c true. That is, \p remove_if returns an - * iterator \c new_last such that the range [first,new_last) contains - * no elements for which \p pred is \c true. The iterators in the range - * [new_last,last) are all still dereferenceable, but the elements that - * they point to are unspecified. \p remove_if is stable, meaning that the - * relative order of elements that are not removed is unchanged. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param pred A predicate to evaluate for each element of the range - * [first,last). Elements for which \p pred evaluates to - * \c false are removed from the sequence. - * \return A ForwardIterator pointing to the end of the resulting range of - * elements for which \p pred evaluated to \c true. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * The following code snippet demonstrates how to use \p remove_if to remove - * all even numbers from an array of integers using the \p thrust::host execution policy for - * parallelization: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * int *new_end = thrust::remove_if(thrust::host, A, A + N, is_even()); - * // The first three values of A are now {1, 5, 7} - * // Values beyond new_end are unspecified - * \endcode - * - * \note The meaning of "removal" is somewhat subtle. \p remove_if does not - * destroy any iterators, and does not change the distance between \p first and - * \p last. (There's no way that it could do anything of the sort.) So, for - * example, if \c V is a device_vector, - * remove_if(V.begin(), V.end(), pred) does not change - * V.size(): \c V will contain just as many elements as it did before. - * \p remove_if returns an iterator that points to the end of the resulting - * range after elements have been removed from it; it follows that the elements - * after that iterator are of no interest, and may be discarded. If you are - * removing elements from a - * Sequence, you may - * simply erase them. That is, a reasonable way of removing elements from a - * Sequence is - * S.erase(remove_if(S.begin(), S.end(), pred), S.end()). - * - * \see http://www.sgi.com/tech/stl/remove_if.html - * \see remove - * \see remove_copy - * \see remove_copy_if - */ -template - ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -/*! \p remove_if removes from the range [first, last) every element \p x - * such that pred(x) is \c true. That is, \p remove_if returns an - * iterator \c new_last such that the range [first,new_last) contains - * no elements for which \p pred is \c true. The iterators in the range - * [new_last,last) are all still dereferenceable, but the elements that - * they point to are unspecified. \p remove_if is stable, meaning that the - * relative order of elements that are not removed is unchanged. - * - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param pred A predicate to evaluate for each element of the range - * [first,last). Elements for which \p pred evaluates to - * \c false are removed from the sequence. - * \return A ForwardIterator pointing to the end of the resulting range of - * elements for which \p pred evaluated to \c true. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * The following code snippet demonstrates how to use \p remove_if to remove - * all even numbers from an array of integers. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * int *new_end = thrust::remove_if(A, A + N, is_even()); - * // The first three values of A are now {1, 5, 7} - * // Values beyond new_end are unspecified - * \endcode - * - * \note The meaning of "removal" is somewhat subtle. \p remove_if does not - * destroy any iterators, and does not change the distance between \p first and - * \p last. (There's no way that it could do anything of the sort.) So, for - * example, if \c V is a device_vector, - * remove_if(V.begin(), V.end(), pred) does not change - * V.size(): \c V will contain just as many elements as it did before. - * \p remove_if returns an iterator that points to the end of the resulting - * range after elements have been removed from it; it follows that the elements - * after that iterator are of no interest, and may be discarded. If you are - * removing elements from a - * Sequence, you may - * simply erase them. That is, a reasonable way of removing elements from a - * Sequence is - * S.erase(remove_if(S.begin(), S.end(), pred), S.end()). - * - * \see http://www.sgi.com/tech/stl/remove_if.html - * \see remove - * \see remove_copy - * \see remove_copy_if - */ -template - ForwardIterator remove_if(ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -/*! \p remove_copy_if copies elements from the range [first,last) to a - * range beginning at \p result, except that elements for which \p pred is - * \c true are not copied. The return value is the end of the resulting range. - * This operation is stable, meaning that the relative order of the elements that - * are copied is the same as the range [first,last). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param result The resulting range is copied to the sequence beginning at this - * location. - * \param pred A predicate to evaluate for each element of the range [first,last). - * Elements for which \p pred evaluates to \c false are not copied - * to the resulting sequence. - * \return An OutputIterator pointing to the end of the resulting range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). - * - * The following code snippet demonstrates how to use \p remove_copy_if to copy - * a sequence of numbers to an output range while omitting even numbers using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * const int N = 6; - * int V[N] = {-2, 0, -1, 0, 1, 2}; - * int result[2]; - * thrust::remove_copy_if(thrust::host, V, V + N, result, is_even()); - * // V remains {-2, 0, -1, 0, 1, 2} - * // result is now {-1, 1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/remove_copy_if.html - * \see remove - * \see remove_copy - * \see remove_if - */ -template - OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - - -/*! \p remove_copy_if copies elements from the range [first,last) to a - * range beginning at \p result, except that elements for which \p pred is - * \c true are not copied. The return value is the end of the resulting range. - * This operation is stable, meaning that the relative order of the elements that - * are copied is the same as the range [first,last). - * - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param result The resulting range is copied to the sequence beginning at this - * location. - * \param pred A predicate to evaluate for each element of the range [first,last). - * Elements for which \p pred evaluates to \c false are not copied - * to the resulting sequence. - * \return An OutputIterator pointing to the end of the resulting range. - * - * \tparam InputIterator is a model of Input Iterator, - * \p InputIterator's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). - * - * The following code snippet demonstrates how to use \p remove_copy_if to copy - * a sequence of numbers to an output range while omitting even numbers. - * - * \code - * #include - * ... - * struct is_even - * { - * __host__ __device__ - * bool operator()(const int x) - * { - * return (x % 2) == 0; - * } - * }; - * ... - * const int N = 6; - * int V[N] = {-2, 0, -1, 0, 1, 2}; - * int result[2]; - * thrust::remove_copy_if(V, V + N, result, is_even()); - * // V remains {-2, 0, -1, 0, 1, 2} - * // result is now {-1, 1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/remove_copy_if.html - * \see remove - * \see remove_copy - * \see remove_if - */ -template - OutputIterator remove_copy_if(InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - - -/*! \p remove_if removes from the range [first, last) every element \p x - * such that pred(x) is \c true. That is, \p remove_if returns an - * iterator \c new_last such that the range [first, new_last) contains - * no elements for which \p pred of the corresponding stencil value is \c true. - * The iterators in the range [new_last,last) are all still dereferenceable, - * but the elements that they point to are unspecified. \p remove_if is stable, - * meaning that the relative order of elements that are not removed is unchanged. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param stencil The beginning of the stencil sequence. - * \param pred A predicate to evaluate for each element of the range - * [stencil, stencil + (last - first)). Elements for which \p pred evaluates to - * \c false are removed from the sequence [first, last) - * \return A ForwardIterator pointing to the end of the resulting range of - * elements for which \p pred evaluated to \c true. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator - * and \p ForwardIterator is mutable. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). - * \pre The range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)). - * - * The following code snippet demonstrates how to use \p remove_if to remove - * specific elements from an array of integers using the \p thrust::host execution policy for - * parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * int S[N] = {0, 1, 1, 1, 0, 0}; - * - * int *new_end = thrust::remove(thrust::host, A, A + N, S, thrust::identity()); - * // The first three values of A are now {1, 5, 7} - * // Values beyond new_end are unspecified - * \endcode - * - * \note The range [first, last) is not permitted to overlap with the range [stencil, stencil + (last - first)). - * - * \see http://www.sgi.com/tech/stl/remove_if.html - * \see remove - * \see remove_copy - * \see remove_copy_if - */ -template - ForwardIterator remove_if(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - - -/*! \p remove_if removes from the range [first, last) every element \p x - * such that pred(x) is \c true. That is, \p remove_if returns an - * iterator \c new_last such that the range [first, new_last) contains - * no elements for which \p pred of the corresponding stencil value is \c true. - * The iterators in the range [new_last,last) are all still dereferenceable, - * but the elements that they point to are unspecified. \p remove_if is stable, - * meaning that the relative order of elements that are not removed is unchanged. - * - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param stencil The beginning of the stencil sequence. - * \param pred A predicate to evaluate for each element of the range - * [stencil, stencil + (last - first)). Elements for which \p pred evaluates to - * \c false are removed from the sequence [first, last) - * \return A ForwardIterator pointing to the end of the resulting range of - * elements for which \p pred evaluated to \c true. - * - * \tparam ForwardIterator is a model of Forward Iterator - * and \p ForwardIterator is mutable. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [first, last) shall not overlap the range [result, result + (last - first)). - * \pre The range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)). - * - * The following code snippet demonstrates how to use \p remove_if to remove - * specific elements from an array of integers. - * - * \code - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * int S[N] = {0, 1, 1, 1, 0, 0}; - * - * int *new_end = thrust::remove(A, A + N, S, thrust::identity()); - * // The first three values of A are now {1, 5, 7} - * // Values beyond new_end are unspecified - * \endcode - * - * \note The range [first, last) is not permitted to overlap with the range [stencil, stencil + (last - first)). - * - * \see http://www.sgi.com/tech/stl/remove_if.html - * \see remove - * \see remove_copy - * \see remove_copy_if - */ -template - ForwardIterator remove_if(ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - - -/*! \p remove_copy_if copies elements from the range [first,last) to a - * range beginning at \p result, except that elements for which \p pred of the - * corresponding stencil value is \c true are not copied. The return value is - * the end of the resulting range. This operation is stable, meaning that the - * relative order of the elements that are copied is the same as the - * range [first,last). - * - * The algorithm's execution policy is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param stencil The beginning of the stencil sequence. - * \param result The resulting range is copied to the sequence beginning at this - * location. - * \param pred A predicate to evaluate for each element of the range [first,last). - * Elements for which \p pred evaluates to \c false are not copied - * to the resulting sequence. - * \return An OutputIterator pointing to the end of the resulting range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)). - * - * The following code snippet demonstrates how to use \p remove_copy_if to copy - * a sequence of numbers to an output range while omitting specific elements using the \p thrust::host - * execution policy for parallelization. - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int V[N] = {-2, 0, -1, 0, 1, 2}; - * int S[N] = { 1, 1, 0, 1, 0, 1}; - * int result[2]; - * thrust::remove_copy_if(thrust::host, V, V + N, S, result, thrust::identity()); - * // V remains {-2, 0, -1, 0, 1, 2} - * // result is now {-1, 1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/remove_copy_if.html - * \see remove - * \see remove_copy - * \see remove_if - * \see copy_if - */ -template - OutputIterator remove_copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - - -/*! \p remove_copy_if copies elements from the range [first,last) to a - * range beginning at \p result, except that elements for which \p pred of the - * corresponding stencil value is \c true are not copied. The return value is - * the end of the resulting range. This operation is stable, meaning that the - * relative order of the elements that are copied is the same as the - * range [first,last). - * - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param stencil The beginning of the stencil sequence. - * \param result The resulting range is copied to the sequence beginning at this - * location. - * \param pred A predicate to evaluate for each element of the range [first,last). - * Elements for which \p pred evaluates to \c false are not copied - * to the resulting sequence. - * \return An OutputIterator pointing to the end of the resulting range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertible to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * - * \pre The range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)). - * - * The following code snippet demonstrates how to use \p remove_copy_if to copy - * a sequence of numbers to an output range while omitting specific elements. - * - * \code - * #include - * ... - * const int N = 6; - * int V[N] = {-2, 0, -1, 0, 1, 2}; - * int S[N] = { 1, 1, 0, 1, 0, 1}; - * int result[2]; - * thrust::remove_copy_if(V, V + N, S, result, thrust::identity()); - * // V remains {-2, 0, -1, 0, 1, 2} - * // result is now {-1, 1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/remove_copy_if.html - * \see remove - * \see remove_copy - * \see remove_if - * \see copy_if - */ -template - OutputIterator remove_copy_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - - -/*! \} // end stream_compaction - */ - - -} // end thrust - -#include - diff --git a/compat/thrust/replace.h b/compat/thrust/replace.h deleted file mode 100644 index 48e3e49e6c..0000000000 --- a/compat/thrust/replace.h +++ /dev/null @@ -1,817 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file replace.h - * \brief Functions for replacing elements in a range with a particular value - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup transformations - * \addtogroup replacing - * \ingroup transformations - * \{ - */ - - -/*! \p replace replaces every element in the range [first, last) equal to \p old_value - * with \p new_value. That is: for every iterator \c i, if *i == old_value - * then it performs the assignment *i = new_value. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence of interest. - * \param last The end of the sequence of interest. - * \param old_value The value to replace. - * \param new_value The new value to replace \p old_value. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam T is a model of Assignable, - * \p T is a model of EqualityComparable, - * objects of \p T may be compared for equality with objects of - * \p ForwardIterator's \c value_type, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p replace to replace - * a value of interest in a \c device_vector with another using the \p thrust::device - * execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 1; - * A[1] = 2; - * A[2] = 3; - * A[3] = 1; - * - * thrust::replace(thrust::device, A.begin(), A.end(), 1, 99); - * - * // A contains [99, 2, 3, 99] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace.html - * \see \c replace_if - * \see \c replace_copy - * \see \c replace_copy_if - */ -template - void replace(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, ForwardIterator last, - const T &old_value, - const T &new_value); - - -/*! \p replace replaces every element in the range [first, last) equal to \p old_value - * with \p new_value. That is: for every iterator \c i, if *i == old_value - * then it performs the assignment *i = new_value. - * - * \param first The beginning of the sequence of interest. - * \param last The end of the sequence of interest. - * \param old_value The value to replace. - * \param new_value The new value to replace \p old_value. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam T is a model of Assignable, - * \p T is a model of EqualityComparable, - * objects of \p T may be compared for equality with objects of - * \p ForwardIterator's \c value_type, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p replace to replace - * a value of interest in a \c device_vector with another. - * - * \code - * #include - * #include - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 1; - * A[1] = 2; - * A[2] = 3; - * A[3] = 1; - * - * thrust::replace(A.begin(), A.end(), 1, 99); - * - * // A contains [99, 2, 3, 99] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace.html - * \see \c replace_if - * \see \c replace_copy - * \see \c replace_copy_if - */ -template - void replace(ForwardIterator first, ForwardIterator last, const T &old_value, - const T &new_value); - - -/*! \p replace_if replaces every element in the range [first, last) for which - * \p pred returns \c true with \p new_value. That is: for every iterator \c i, if - * pred(*i) is \c true then it performs the assignment *i = new_value. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence of interest. - * \param last The end of the sequence of interest. - * \param pred The predicate to test on every value of the range [first,last). - * \param new_value The new value to replace elements which pred(*i) evaluates - * to \c true. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p replace_if to replace - * a \c device_vector's negative elements with \c 0 using the \p thrust::device execution policy - * for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * struct is_less_than_zero - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x < 0; - * } - * }; - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 1; - * A[1] = -3; - * A[2] = 2; - * A[3] = -1; - * - * is_less_than_zero pred; - * - * thrust::replace_if(thrust::device, A.begin(), A.end(), pred, 0); - * - * // A contains [1, 0, 2, 0] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace_if.html - * \see \c replace - * \see \c replace_copy - * \see \c replace_copy_if - */ -template - void replace_if(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, ForwardIterator last, - Predicate pred, - const T &new_value); - - -/*! \p replace_if replaces every element in the range [first, last) for which - * \p pred returns \c true with \p new_value. That is: for every iterator \c i, if - * pred(*i) is \c true then it performs the assignment *i = new_value. - * - * \param first The beginning of the sequence of interest. - * \param last The end of the sequence of interest. - * \param pred The predicate to test on every value of the range [first,last). - * \param new_value The new value to replace elements which pred(*i) evaluates - * to \c true. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, - * and \p ForwardIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p replace_if to replace - * a \c device_vector's negative elements with \c 0. - * - * \code - * #include - * #include - * ... - * struct is_less_than_zero - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x < 0; - * } - * }; - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 1; - * A[1] = -3; - * A[2] = 2; - * A[3] = -1; - * - * is_less_than_zero pred; - * - * thrust::replace_if(A.begin(), A.end(), pred, 0); - * - * // A contains [1, 0, 2, 0] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace_if.html - * \see \c replace - * \see \c replace_copy - * \see \c replace_copy_if - */ -template - void replace_if(ForwardIterator first, ForwardIterator last, - Predicate pred, - const T &new_value); - - -/*! \p replace_if replaces every element in the range [first, last) for which - * pred(*s) returns \c true with \p new_value. That is: for every iterator - * \c i in the range [first, last), and \c s in the range [stencil, stencil + (last - first)), - * if pred(*s) is \c true then it performs the assignment *i = new_value. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence of interest. - * \param last The end of the sequence of interest. - * \param stencil The beginning of the stencil sequence. - * \param pred The predicate to test on every value of the range [first,last). - * \param new_value The new value to replace elements which pred(*i) evaluates - * to \c true. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p replace_if to replace - * a \c device_vector's element with \c 0 when its corresponding stencil element is less than zero - * using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * - * struct is_less_than_zero - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x < 0; - * } - * }; - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 10; - * A[1] = 20; - * A[2] = 30; - * A[3] = 40; - * - * thrust::device_vector S(4); - * S[0] = -1; - * S[1] = 0; - * S[2] = -1; - * S[3] = 0; - * - * is_less_than_zero pred; - * thrust::replace_if(thrust::device, A.begin(), A.end(), S.begin(), pred, 0); - * - * // A contains [0, 20, 0, 40] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace_if.html - * \see \c replace - * \see \c replace_copy - * \see \c replace_copy_if - */ -template - void replace_if(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, ForwardIterator last, - InputIterator stencil, - Predicate pred, - const T &new_value); - - -/*! \p replace_if replaces every element in the range [first, last) for which - * pred(*s) returns \c true with \p new_value. That is: for every iterator - * \c i in the range [first, last), and \c s in the range [stencil, stencil + (last - first)), - * if pred(*s) is \c true then it performs the assignment *i = new_value. - * - * \param first The beginning of the sequence of interest. - * \param last The end of the sequence of interest. - * \param stencil The beginning of the stencil sequence. - * \param pred The predicate to test on every value of the range [first,last). - * \param new_value The new value to replace elements which pred(*i) evaluates - * to \c true. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam Predicate is a model of Predicate. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p replace_if to replace - * a \c device_vector's element with \c 0 when its corresponding stencil element is less than zero. - * - * \code - * #include - * #include - * - * struct is_less_than_zero - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x < 0; - * } - * }; - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 10; - * A[1] = 20; - * A[2] = 30; - * A[3] = 40; - * - * thrust::device_vector S(4); - * S[0] = -1; - * S[1] = 0; - * S[2] = -1; - * S[3] = 0; - * - * is_less_than_zero pred; - * thrust::replace_if(A.begin(), A.end(), S.begin(), pred, 0); - * - * // A contains [0, 20, 0, 40] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace_if.html - * \see \c replace - * \see \c replace_copy - * \see \c replace_copy_if - */ -template - void replace_if(ForwardIterator first, ForwardIterator last, - InputIterator stencil, - Predicate pred, - const T &new_value); - - -/*! \p replace_copy copies elements from the range [first, last) to the range - * [result, result + (last-first)), except that any element equal to \p old_value - * is not copied; \p new_value is copied instead. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, \p replace_copy - * performs the assignment *(result+n) = new_value if *(first+n) == old_value, - * and *(result+n) = *(first+n) otherwise. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence to copy from. - * \param last The end of the sequence to copy from. - * \param result The beginning of the sequence to copy to. - * \param old_value The value to replace. - * \param new_value The replacement value for which *i == old_value evaluates to \c true. - * \return result + (last-first) - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam T is a model of Assignable, - * \p T is a model of Equality Comparable, - * \p T may be compared for equality with \p InputIterator's \c value_type, - * and \p T is convertible to \p OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector A(4); - * A[0] = 1; - * A[1] = 2; - * A[2] = 3; - * A[3] = 1; - * - * thrust::device_vector B(4); - * - * thrust::replace_copy(thrust::device, A.begin(), A.end(), B.begin(), 1, 99); - * - * // B contains [99, 2, 3, 99] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace_copy.html - * \see \c copy - * \see \c replace - * \see \c replace_if - * \see \c replace_copy_if - */ -template - OutputIterator replace_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result, - const T &old_value, - const T &new_value); - - -/*! \p replace_copy copies elements from the range [first, last) to the range - * [result, result + (last-first)), except that any element equal to \p old_value - * is not copied; \p new_value is copied instead. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, \p replace_copy - * performs the assignment *(result+n) = new_value if *(first+n) == old_value, - * and *(result+n) = *(first+n) otherwise. - * - * \param first The beginning of the sequence to copy from. - * \param last The end of the sequence to copy from. - * \param result The beginning of the sequence to copy to. - * \param old_value The value to replace. - * \param new_value The replacement value for which *i == old_value evaluates to \c true. - * \return result + (last-first) - * - * \tparam InputIterator is a model of Input Iterator. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam T is a model of Assignable, - * \p T is a model of Equality Comparable, - * \p T may be compared for equality with \p InputIterator's \c value_type, - * and \p T is convertible to \p OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. - * - * \code - * #include - * #include - * ... - * thrust::device_vector A(4); - * A[0] = 1; - * A[1] = 2; - * A[2] = 3; - * A[3] = 1; - * - * thrust::device_vector B(4); - * - * thrust::replace_copy(A.begin(), A.end(), B.begin(), 1, 99); - * - * // B contains [99, 2, 3, 99] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace_copy.html - * \see \c copy - * \see \c replace - * \see \c replace_if - * \see \c replace_copy_if - */ -template - OutputIterator replace_copy(InputIterator first, InputIterator last, - OutputIterator result, const T &old_value, - const T &new_value); - - -/*! \p replace_copy_if copies elements from the range [first, last) to the range - * [result, result + (last-first)), except that any element for which \p pred - * is \c true is not copied; \p new_value is copied instead. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, - * \p replace_copy_if performs the assignment *(result+n) = new_value if - * pred(*(first+n)), and *(result+n) = *(first+n) otherwise. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence to copy from. - * \param last The end of the sequence to copy from. - * \param result The beginning of the sequence to copy to. - * \param pred The predicate to test on every value of the range [first,last). - * \param new_value The replacement value to assign pred(*i) evaluates to \c true. - * \return result + (last-first) - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. - * - * \code - * #include - * #include - * #include - * - * struct is_less_than_zero - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x < 0; - * } - * }; - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 1; - * A[1] = -3; - * A[2] = 2; - * A[3] = -1; - - * thrust::device_vector B(4); - * is_less_than_zero pred; - * - * thrust::replace_copy_if(thrust::device, A.begin(), A.end(), B.begin(), pred, 0); - * - * // B contains [1, 0, 2, 0] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace_copy_if.html - * \see \c replace - * \see \c replace_if - * \see \c replace_copy - */ -template - OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result, - Predicate pred, - const T &new_value); - - -/*! \p replace_copy_if copies elements from the range [first, last) to the range - * [result, result + (last-first)), except that any element for which \p pred - * is \c true is not copied; \p new_value is copied instead. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, - * \p replace_copy_if performs the assignment *(result+n) = new_value if - * pred(*(first+n)), and *(result+n) = *(first+n) otherwise. - * - * \param first The beginning of the sequence to copy from. - * \param last The end of the sequence to copy from. - * \param result The beginning of the sequence to copy to. - * \param pred The predicate to test on every value of the range [first,last). - * \param new_value The replacement value to assign pred(*i) evaluates to \c true. - * \return result + (last-first) - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. - * - * \code - * #include - * #include - * - * struct is_less_than_zero - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x < 0; - * } - * }; - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 1; - * A[1] = -3; - * A[2] = 2; - * A[3] = -1; - - * thrust::device_vector B(4); - * is_less_than_zero pred; - * - * thrust::replace_copy_if(A.begin(), A.end(), B.begin(), pred, 0); - * - * // B contains [1, 0, 2, 0] - * \endcode - * - * \see http://www.sgi.com/tech/stl/replace_copy_if.html - * \see \c replace - * \see \c replace_if - * \see \c replace_copy - */ -template - OutputIterator replace_copy_if(InputIterator first, InputIterator last, - OutputIterator result, - Predicate pred, - const T &new_value); - - -/*! This version of \p replace_copy_if copies elements from the range [first, last) to the range - * [result, result + (last-first)), except that any element whose corresponding stencil - * element causes \p pred to be \c true is not copied; \p new_value is copied instead. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, - * \p replace_copy_if performs the assignment *(result+n) = new_value if - * pred(*(stencil+n)), and *(result+n) = *(first+n) otherwise. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence to copy from. - * \param last The end of the sequence to copy from. - * \param stencil The beginning of the stencil sequence. - * \param result The beginning of the sequence to copy to. - * \param pred The predicate to test on every value of the range [stencil, stencil + (last - first)). - * \param new_value The replacement value to assign when pred(*s) evaluates to \c true. - * \return result + (last-first) - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator. - * \tparam InputIterator2 is a model of Input Iterator - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. - * \pre \p stencil may equal \p result, but the ranges [stencil, stencil + (last - first)) and [result, result + (last - first)) shall not overlap otherwise. - * - * \code - * #include - * #include - * #include - * - * struct is_less_than_zero - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x < 0; - * } - * }; - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 10; - * A[1] = 20; - * A[2] = 30; - * A[3] = 40; - * - * thrust::device_vector S(4); - * S[0] = -1; - * S[1] = 0; - * S[2] = -1; - * S[3] = 0; - * - * thrust::device_vector B(4); - * is_less_than_zero pred; - * - * thrust::replace_if(thrust::device, A.begin(), A.end(), S.begin(), B.begin(), pred, 0); - * - * // B contains [0, 20, 0, 40] - * \endcode - * - * \see \c replace_copy - * \see \c replace_if - */ -template - OutputIterator replace_copy_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred, - const T &new_value); - - -/*! This version of \p replace_copy_if copies elements from the range [first, last) to the range - * [result, result + (last-first)), except that any element whose corresponding stencil - * element causes \p pred to be \c true is not copied; \p new_value is copied instead. - * - * More precisely, for every integer \c n such that 0 <= n < last-first, - * \p replace_copy_if performs the assignment *(result+n) = new_value if - * pred(*(stencil+n)), and *(result+n) = *(first+n) otherwise. - * - * \param first The beginning of the sequence to copy from. - * \param last The end of the sequence to copy from. - * \param stencil The beginning of the stencil sequence. - * \param result The beginning of the sequence to copy to. - * \param pred The predicate to test on every value of the range [stencil, stencil + (last - first)). - * \param new_value The replacement value to assign when pred(*s) evaluates to \c true. - * \return result + (last-first) - * - * \tparam InputIterator1 is a model of Input Iterator. - * \tparam InputIterator2 is a model of Input Iterator - * and \p InputIterator2's \c value_type is convertible to \p Predicate's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam Predicate is a model of Predicate. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the ranges [first, last) and [result, result + (last - first)) shall not overlap otherwise. - * \pre \p stencil may equal \p result, but the ranges [stencil, stencil + (last - first)) and [result, result + (last - first)) shall not overlap otherwise. - * - * \code - * #include - * #include - * - * struct is_less_than_zero - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x < 0; - * } - * }; - * - * ... - * - * thrust::device_vector A(4); - * A[0] = 10; - * A[1] = 20; - * A[2] = 30; - * A[3] = 40; - * - * thrust::device_vector S(4); - * S[0] = -1; - * S[1] = 0; - * S[2] = -1; - * S[3] = 0; - * - * thrust::device_vector B(4); - * is_less_than_zero pred; - * - * thrust::replace_if(A.begin(), A.end(), S.begin(), B.begin(), pred, 0); - * - * // B contains [0, 20, 0, 40] - * \endcode - * - * \see \c replace_copy - * \see \c replace_if - */ -template - OutputIterator replace_copy_if(InputIterator1 first, InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred, - const T &new_value); - - -/*! \} // end replacing - * \} // transformations - */ - - -} // end thrust - -#include - diff --git a/compat/thrust/reverse.h b/compat/thrust/reverse.h deleted file mode 100644 index ba50c5d05a..0000000000 --- a/compat/thrust/reverse.h +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reverse.h - * \brief Reverses the order of a range - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup reordering - * \ingroup algorithms - */ - - -/*! \p reverse reverses a range. That is: for every i such that - * 0 <= i <= (last - first) / 2, it exchanges *(first + i) - * and *(last - (i + 1)). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range to reverse. - * \param last The end of the range to reverse. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam BidirectionalIterator is a model of Bidirectional Iterator and - * \p BidirectionalIterator is mutable. - * - * The following code snippet demonstrates how to use \p reverse to reverse a - * \p device_vector of integers using the \p thrust::device execution policy for - * parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int data[N] = {0, 1, 2, 3, 4, 5}; - * thrust::device_vector v(data, data + N); - * thrust::reverse(thrust::device, v.begin(), v.end()); - * // v is now {5, 4, 3, 2, 1, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/reverse.html - * \see \p reverse_copy - * \see \p reverse_iterator - */ -template - void reverse(const thrust::detail::execution_policy_base &exec, - BidirectionalIterator first, - BidirectionalIterator last); - - -/*! \p reverse reverses a range. That is: for every i such that - * 0 <= i <= (last - first) / 2, it exchanges *(first + i) - * and *(last - (i + 1)). - * - * \param first The beginning of the range to reverse. - * \param last The end of the range to reverse. - * - * \tparam BidirectionalIterator is a model of Bidirectional Iterator and - * \p BidirectionalIterator is mutable. - * - * The following code snippet demonstrates how to use \p reverse to reverse a - * \p device_vector of integers. - * - * \code - * #include - * ... - * const int N = 6; - * int data[N] = {0, 1, 2, 3, 4, 5}; - * thrust::device_vector v(data, data + N); - * thrust::reverse(v.begin(), v.end()); - * // v is now {5, 4, 3, 2, 1, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/reverse.html - * \see \p reverse_copy - * \see \p reverse_iterator - */ -template - void reverse(BidirectionalIterator first, - BidirectionalIterator last); - - -/*! \p reverse_copy differs from \ref reverse only in that the reversed range - * is written to a different output range, rather than inplace. - * - * \p reverse_copy copies elements from the range [first, last) to the - * range [result, result + (last - first)) such that the copy is a - * reverse of the original range. Specifically: for every i such that - * 0 <= i < (last - first), \p reverse_copy performs the assignment - * *(result + (last - first) - i) = *(first + i). - * - * The return value is result + (last - first)). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range to reverse. - * \param last The end of the range to reverse. - * \param result The beginning of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam BidirectionalIterator is a model of Bidirectional Iterator, - * and \p BidirectionalIterator's \p value_type is convertible to \p OutputIterator's \p value_type. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The range [first, last) and the range [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p reverse_copy to reverse - * an input \p device_vector of integers to an output \p device_vector using the \p thrust::device - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int data[N] = {0, 1, 2, 3, 4, 5}; - * thrust::device_vector input(data, data + N); - * thrust::device_vector output(N); - * thrust::reverse_copy(thrust::device, v.begin(), v.end(), output.begin()); - * // input is still {0, 1, 2, 3, 4, 5} - * // output is now {5, 4, 3, 2, 1, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/reverse_copy.html - * \see \p reverse - * \see \p reverse_iterator - */ -template - OutputIterator reverse_copy(const thrust::detail::execution_policy_base &exec, - BidirectionalIterator first, - BidirectionalIterator last, - OutputIterator result); - - -/*! \p reverse_copy differs from \ref reverse only in that the reversed range - * is written to a different output range, rather than inplace. - * - * \p reverse_copy copies elements from the range [first, last) to the - * range [result, result + (last - first)) such that the copy is a - * reverse of the original range. Specifically: for every i such that - * 0 <= i < (last - first), \p reverse_copy performs the assignment - * *(result + (last - first) - i) = *(first + i). - * - * The return value is result + (last - first)). - * - * \param first The beginning of the range to reverse. - * \param last The end of the range to reverse. - * \param result The beginning of the output range. - * - * \tparam BidirectionalIterator is a model of Bidirectional Iterator, - * and \p BidirectionalIterator's \p value_type is convertible to \p OutputIterator's \p value_type. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The range [first, last) and the range [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p reverse_copy to reverse - * an input \p device_vector of integers to an output \p device_vector. - * - * \code - * #include - * ... - * const int N = 6; - * int data[N] = {0, 1, 2, 3, 4, 5}; - * thrust::device_vector input(data, data + N); - * thrust::device_vector output(N); - * thrust::reverse_copy(v.begin(), v.end(), output.begin()); - * // input is still {0, 1, 2, 3, 4, 5} - * // output is now {5, 4, 3, 2, 1, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/reverse_copy.html - * \see \p reverse - * \see \p reverse_iterator - */ -template - OutputIterator reverse_copy(BidirectionalIterator first, - BidirectionalIterator last, - OutputIterator result); - - -/*! \} // end reordering - */ - - -} // end thrust - -#include - diff --git a/compat/thrust/scan.h b/compat/thrust/scan.h deleted file mode 100644 index 95074e6b90..0000000000 --- a/compat/thrust/scan.h +++ /dev/null @@ -1,1552 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan.h - * \brief Functions for computing prefix sums - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup algorithms - */ - - -/*! \addtogroup prefixsums Prefix Sums - * \ingroup algorithms - * \{ - */ - - -/*! \p inclusive_scan computes an inclusive prefix sum operation. The - * term 'inclusive' means that each result includes the corresponding - * input operand in the partial sum. More precisely, *first is - * assigned to *result and the sum of *first and - * *(first + 1) is assigned to *(result + 1), and so on. - * This version of \p inclusive_scan assumes plus as the associative operator. - * When the input and output sequences are the same, the scan is performed - * in-place. - - * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary - * difference between the two functions is that \c std::partial_sum guarantees - * a serial summation order, while \p inclusive_scan requires associativity of - * the binary operation to parallelize the prefix sum. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's - * \c value_type, then x + y is defined. If \c T is - * \c OutputIterator's \c value_type, then T(0) is - * defined. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place - * prefix sum using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::inclusive_scan(thrust::host, data, data + 6, data); // in-place scan - * - * // data is now {1, 1, 3, 5, 6, 9} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - * - */ -template - OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -/*! \p inclusive_scan computes an inclusive prefix sum operation. The - * term 'inclusive' means that each result includes the corresponding - * input operand in the partial sum. More precisely, *first is - * assigned to *result and the sum of *first and - * *(first + 1) is assigned to *(result + 1), and so on. - * This version of \p inclusive_scan assumes plus as the associative operator. - * When the input and output sequences are the same, the scan is performed - * in-place. - - * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary - * difference between the two functions is that \c std::partial_sum guarantees - * a serial summation order, while \p inclusive_scan requires associativity of - * the binary operation to parallelize the prefix sum. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's - * \c value_type, then x + y is defined. If \c T is - * \c OutputIterator's \c value_type, then T(0) is - * defined. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan - * - * \code - * #include - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::inclusive_scan(data, data + 6, data); // in-place scan - * - * // data is now {1, 1, 3, 5, 6, 9} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - * - */ -template - OutputIterator inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result); - - -/*! \p inclusive_scan computes an inclusive prefix sum operation. The - * term 'inclusive' means that each result includes the corresponding - * input operand in the partial sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary - * difference between the two functions is that \c std::partial_sum guarantees - * a serial summation order, while \p inclusive_scan requires associativity of - * the binary operation to parallelize the prefix sum. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param binary_op The associatve operator used to 'sum' values. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator - * and \c OutputIterator's \c value_type is convertible to - * both \c AssociativeOperator's \c first_argument_type and - * \c second_argument_type. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan to compute an in-place - * prefix sum using the \p thrust::host execution policy for parallelization: - * - * \code - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * - * thrust::maximum binary_op; - * - * thrust::inclusive_scan(thrust::host, data, data + 10, data, binary_op); // in-place scan - * - * // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - */ -template - OutputIterator inclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - AssociativeOperator binary_op); - - -/*! \p inclusive_scan computes an inclusive prefix sum operation. The - * term 'inclusive' means that each result includes the corresponding - * input operand in the partial sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * \p inclusive_scan is similar to \c std::partial_sum in the STL. The primary - * difference between the two functions is that \c std::partial_sum guarantees - * a serial summation order, while \p inclusive_scan requires associativity of - * the binary operation to parallelize the prefix sum. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param binary_op The associatve operator used to 'sum' values. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator - * and \c OutputIterator's \c value_type is convertible to - * both \c AssociativeOperator's \c first_argument_type and - * \c second_argument_type. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan - * - * \code - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * - * thrust::maximum binary_op; - * - * thrust::inclusive_scan(data, data + 10, data, binary_op); // in-place scan - * - * // data is now {-5, 0, 2, 2, 2, 4, 4, 4, 4, 8} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - */ -template - OutputIterator inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - AssociativeOperator binary_op); - - -/*! \p exclusive_scan computes an exclusive prefix sum operation. The - * term 'exclusive' means that each result does not include the - * corresponding input operand in the partial sum. More precisely, - * 0 is assigned to *result and the sum of - * 0 and *first is assigned to *(result + 1), - * and so on. This version of \p exclusive_scan assumes plus as the - * associative operator and \c 0 as the initial value. When the input and - * output sequences are the same, the scan is performed in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's - * \c value_type, then x + y is defined. If \c T is - * \c OutputIterator's \c value_type, then T(0) is - * defined. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place - * prefix sum using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::exclusive_scan(thrust::host, data, data + 6, data); // in-place scan - * - * // data is now {0, 1, 1, 3, 5, 6} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - */ -template - OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -/*! \p exclusive_scan computes an exclusive prefix sum operation. The - * term 'exclusive' means that each result does not include the - * corresponding input operand in the partial sum. More precisely, - * 0 is assigned to *result and the sum of - * 0 and *first is assigned to *(result + 1), - * and so on. This version of \p exclusive_scan assumes plus as the - * associative operator and \c 0 as the initial value. When the input and - * output sequences are the same, the scan is performed in-place. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's - * \c value_type, then x + y is defined. If \c T is - * \c OutputIterator's \c value_type, then T(0) is - * defined. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan - * - * \code - * #include - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::exclusive_scan(data, data + 6, data); // in-place scan - * - * // data is now {0, 1, 1, 3, 5, 6} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - */ -template - OutputIterator exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result); - - -/*! \p exclusive_scan computes an exclusive prefix sum operation. The - * term 'exclusive' means that each result does not include the - * corresponding input operand in the partial sum. More precisely, - * \p init is assigned to *result and the sum of \p init and - * *first is assigned to *(result + 1), and so on. - * This version of \p exclusive_scan assumes plus as the associative - * operator but requires an initial value \p init. When the input and - * output sequences are the same, the scan is performed in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param init The initial value. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's - * \c value_type, then x + y is defined. - * \tparam T is convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place - * prefix sum using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::exclusive_scan(thrust::host, data, data + 6, data, 4); // in-place scan - * - * // data is now {4, 5, 5, 7, 9, 10} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - */ -template - OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init); - - -/*! \p exclusive_scan computes an exclusive prefix sum operation. The - * term 'exclusive' means that each result does not include the - * corresponding input operand in the partial sum. More precisely, - * \p init is assigned to *result and the sum of \p init and - * *first is assigned to *(result + 1), and so on. - * This version of \p exclusive_scan assumes plus as the associative - * operator but requires an initial value \p init. When the input and - * output sequences are the same, the scan is performed in-place. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param init The initial value. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's - * \c value_type, then x + y is defined. - * \tparam T is convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan - * - * \code - * #include - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::exclusive_scan(data, data + 6, data, 4); // in-place scan - * - * // data is now {4, 5, 5, 7, 9, 10} - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - */ -template - OutputIterator exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - T init); - - -/*! \p exclusive_scan computes an exclusive prefix sum operation. The - * term 'exclusive' means that each result does not include the - * corresponding input operand in the partial sum. More precisely, - * \p init is assigned to \*result and the value - * binary_op(init, \*first) is assigned to \*(result + 1), - * and so on. This version of the function requires both and associative - * operator and an initial value \p init. When the input and output - * sequences are the same, the scan is performed in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param init The initial value. - * \param binary_op The associatve operator used to 'sum' values. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator - * and \c OutputIterator's \c value_type is convertible to - * both \c AssociativeOperator's \c first_argument_type and - * \c second_argument_type. - * \tparam T is convertible to \c OutputIterator's \c value_type. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan to compute an in-place - * prefix sum using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * - * thrust::maximum binary_op; - * - * thrust::exclusive_scan(thrust::host, data, data + 10, data, 1, binary_op); // in-place scan - * - * // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 } - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - */ -template - OutputIterator exclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - AssociativeOperator binary_op); - - -/*! \p exclusive_scan computes an exclusive prefix sum operation. The - * term 'exclusive' means that each result does not include the - * corresponding input operand in the partial sum. More precisely, - * \p init is assigned to \*result and the value - * binary_op(init, \*first) is assigned to \*(result + 1), - * and so on. This version of the function requires both and associative - * operator and an initial value \p init. When the input and output - * sequences are the same, the scan is performed in-place. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param init The initial value. - * \param binary_op The associatve operator used to 'sum' values. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to - * \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator - * and \c OutputIterator's \c value_type is convertible to - * both \c AssociativeOperator's \c first_argument_type and - * \c second_argument_type. - * \tparam T is convertible to \c OutputIterator's \c value_type. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan - * - * \code - * #include - * #include - * - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * - * thrust::maximum binary_op; - * - * thrust::exclusive_scan(data, data + 10, data, 1, binary_op); // in-place scan - * - * // data is now {1, 1, 1, 2, 2, 2, 4, 4, 4, 4 } - * \endcode - * - * \see http://www.sgi.com/tech/stl/partial_sum.html - */ -template - OutputIterator exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - T init, - AssociativeOperator binary_op); - - -/*! \addtogroup segmentedprefixsums Segmented Prefix Sums - * \ingroup prefixsums - * \{ - */ - - -/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix - * sum operation. The term 'inclusive' means that each result includes - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate inclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p inclusive_scan_by_key assumes \c equal_to as the binary - * predicate used to compare adjacent keys. Specifically, consecutive iterators - * i and i+1 in the range [first1, last1) - * belong to the same segment if *i == *(i+1), and belong to - * different segments otherwise. - * - * This version of \p inclusive_scan_by_key assumes \c plus as the associative - * operator used to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then - * binary_op(x,y) is defined. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * - * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * - * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals); // in-place scan - * - * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; - * \endcode - * - * \see inclusive_scan - * \see exclusive_scan_by_key - * - */ -template - OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result); - - -/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix - * sum operation. The term 'inclusive' means that each result includes - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate inclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p inclusive_scan_by_key assumes \c equal_to as the binary - * predicate used to compare adjacent keys. Specifically, consecutive iterators - * i and i+1 in the range [first1, last1) - * belong to the same segment if *i == *(i+1), and belong to - * different segments otherwise. - * - * This version of \p inclusive_scan_by_key assumes \c plus as the associative - * operator used to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \return The end of the output sequence. - * - * \tparam InputIterator1 is a model of Input Iterator - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then - * binary_op(x,y) is defined. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan_by_key - * - * \code - * #include - * - * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * - * thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals); // in-place scan - * - * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; - * \endcode - * - * \see inclusive_scan - * \see exclusive_scan_by_key - * - */ -template - OutputIterator inclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result); - - -/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix - * sum operation. The term 'inclusive' means that each result includes - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate inclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p inclusive_scan_by_key uses the binary predicate - * \c pred to compare adjacent keys. Specifically, consecutive iterators - * i and i+1 in the range [first1, last1) - * belong to the same segment if binary_pred(*i, *(i+1)) is true, and belong to - * different segments otherwise. - * - * This version of \p inclusive_scan_by_key assumes \c plus as the associative - * operator used to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param binary_pred The binary predicate used to determine equality of keys. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then - * binary_op(x,y) is defined. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * - * thrust::equal_to binary_pred; - * - * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals, binary_pred); // in-place scan - * - * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; - * \endcode - * - * \see inclusive_scan - * \see exclusive_scan_by_key - * - */ -template - OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred); - - -/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix - * sum operation. The term 'inclusive' means that each result includes - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate inclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p inclusive_scan_by_key uses the binary predicate - * \c pred to compare adjacent keys. Specifically, consecutive iterators - * i and i+1 in the range [first1, last1) - * belong to the same segment if binary_pred(*i, *(i+1)) is true, and belong to - * different segments otherwise. - * - * This version of \p inclusive_scan_by_key assumes \c plus as the associative - * operator used to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param binary_pred The binary predicate used to determine equality of keys. - * \return The end of the output sequence. - * - * \tparam InputIterator1 is a model of Input Iterator - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then - * binary_op(x,y) is defined. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan_by_key - * - * \code - * #include - * #include - * - * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * - * thrust::equal_to binary_pred; - * - * thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals, binary_pred); // in-place scan - * - * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; - * \endcode - * - * \see inclusive_scan - * \see exclusive_scan_by_key - * - */ -template - OutputIterator inclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred); - - -/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix - * sum operation. The term 'inclusive' means that each result includes - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate inclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p inclusive_scan_by_key uses the binary predicate - * \c pred to compare adjacent keys. Specifically, consecutive iterators - * i and i+1 in the range [first1, last1) - * belong to the same segment if binary_pred(*i, *(i+1)) is true, and belong to - * different segments otherwise. - * - * This version of \p inclusive_scan_by_key uses the associative operator - * \c binary_op to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param binary_pred The binary predicate used to determine equality of keys. - * \param binary_op The associatve operator used to 'sum' values. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then - * binary_op(x,y) is defined. - * \tparam BinaryPredicate is a model of Binary Predicate. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan_by_key using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * - * thrust::equal_to binary_pred; - * thrust::plus binary_op; - * - * thrust::inclusive_scan_by_key(thrust::host, keys, keys + 10, vals, vals, binary_pred, binary_op); // in-place scan - * - * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; - * \endcode - * - * \see inclusive_scan - * \see exclusive_scan_by_key - * - */ -template - OutputIterator inclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred, - AssociativeOperator binary_op); - - -/*! \p inclusive_scan_by_key computes an inclusive key-value or 'segmented' prefix - * sum operation. The term 'inclusive' means that each result includes - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate inclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p inclusive_scan_by_key uses the binary predicate - * \c pred to compare adjacent keys. Specifically, consecutive iterators - * i and i+1 in the range [first1, last1) - * belong to the same segment if binary_pred(*i, *(i+1)) is true, and belong to - * different segments otherwise. - * - * This version of \p inclusive_scan_by_key uses the associative operator - * \c binary_op to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param binary_pred The binary predicate used to determine equality of keys. - * \param binary_op The associatve operator used to 'sum' values. - * \return The end of the output sequence. - * - * \tparam InputIterator1 is a model of Input Iterator - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then - * binary_op(x,y) is defined. - * \tparam BinaryPredicate is a model of Binary Predicate. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p inclusive_scan_by_key - * - * \code - * #include - * #include - * - * int data[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * - * thrust::equal_to binary_pred; - * thrust::plus binary_op; - * - * thrust::inclusive_scan_by_key(keys, keys + 10, vals, vals, binary_pred, binary_op); // in-place scan - * - * // data is now {1, 2, 3, 1, 2, 1, 1, 2, 3, 4}; - * \endcode - * - * \see inclusive_scan - * \see exclusive_scan_by_key - * - */ -template - OutputIterator inclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred, - AssociativeOperator binary_op); - - -/*! \p exclusive_scan_by_key computes an exclusive segmented prefix - * - * This version of \p exclusive_scan_by_key uses the value \c 0 to - * initialize the exclusive scan operation. - * - * This version of \p exclusive_scan_by_key assumes \c plus as the associative - * operator used to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * This version of \p exclusive_scan_by_key assumes \c equal_to as the binary - * predicate used to compare adjacent keys. Specifically, consecutive iterators - * i and i+1 in the range [first1, last1 - * belong to the same segment if *i == *(i+1), and belong to - * different segments otherwise. - * - * Refer to the most general form of \p exclusive_scan_by_key for additional details. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * - * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals); // in-place scan - * - * // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3}; - * \endcode - * - * \see exclusive_scan - * - */ -template - OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result); - - -/*! \p exclusive_scan_by_key computes an exclusive segmented prefix - * - * This version of \p exclusive_scan_by_key uses the value \c 0 to - * initialize the exclusive scan operation. - * - * This version of \p exclusive_scan_by_key assumes \c plus as the associative - * operator used to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * This version of \p exclusive_scan_by_key assumes \c equal_to as the binary - * predicate used to compare adjacent keys. Specifically, consecutive iterators - * i and i+1 in the range [first1, last1 - * belong to the same segment if *i == *(i+1), and belong to - * different segments otherwise. - * - * Refer to the most general form of \p exclusive_scan_by_key for additional details. - * - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan_by_key. - * - * \code - * #include - * - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * - * thrust::exclusive_scan_by_key(key, key + 10, vals, vals); // in-place scan - * - * // vals is now {0, 1, 2, 0, 1, 0, 0, 1, 2, 3}; - * \endcode - * - * \see exclusive_scan - * - */ -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result); - - -/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix - * sum operation. The term 'exclusive' means that each result does not include - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate exclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p exclusive_scan_by_key uses the value \c init to - * initialize the exclusive scan operation. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param init The initial of the exclusive sum value. - * \return The end of the output sequence. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the \p - * thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * - * int init = 5; - * - * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init); // in-place scan - * - * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; - * \endcode - * - * \see exclusive_scan - * \see inclusive_scan_by_key - * - */ -template - OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init); - - -/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix - * sum operation. The term 'exclusive' means that each result does not include - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate exclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p exclusive_scan_by_key uses the value \c init to - * initialize the exclusive scan operation. - * - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param init The initial of the exclusive sum value. - * \return The end of the output sequence. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan_by_key - * - * \code - * #include - * #include - * - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * - * int init = 5; - * - * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init); // in-place scan - * - * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; - * \endcode - * - * \see exclusive_scan - * \see inclusive_scan_by_key - * - */ -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init); - - -/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix - * sum operation. The term 'exclusive' means that each result does not include - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate exclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p exclusive_scan_by_key uses the value \c init to - * initialize the exclusive scan operation. - * - * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred - * to compare adjacent keys. Specifically, consecutive iterators i and - * i+1 in the range [first1, last1) belong to the same segment if - * binary_pred(*i, *(i+1)) is true, and belong to different segments otherwise. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param init The initial of the exclusive sum value. - * \param binary_pred The binary predicate used to determine equality of keys. - * \return The end of the output sequence. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * - * int init = 5; - * - * thrust::equal_to binary_pred; - * - * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred); // in-place scan - * - * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; - * \endcode - * - * \see exclusive_scan - * \see inclusive_scan_by_key - * - */ -template - OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred); - - -/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix - * sum operation. The term 'exclusive' means that each result does not include - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate exclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p exclusive_scan_by_key uses the value \c init to - * initialize the exclusive scan operation. - * - * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred - * to compare adjacent keys. Specifically, consecutive iterators i and - * i+1 in the range [first1, last1) belong to the same segment if - * binary_pred(*i, *(i+1)) is true, and belong to different segments otherwise. - * - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param init The initial of the exclusive sum value. - * \param binary_pred The binary predicate used to determine equality of keys. - * \return The end of the output sequence. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan_by_key - * - * \code - * #include - * #include - * - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * - * int init = 5; - * - * thrust::equal_to binary_pred; - * - * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred); // in-place scan - * - * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; - * \endcode - * - * \see exclusive_scan - * \see inclusive_scan_by_key - * - */ -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred); - - -/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix - * sum operation. The term 'exclusive' means that each result does not include - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate exclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p exclusive_scan_by_key uses the value \c init to - * initialize the exclusive scan operation. - * - * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred - * to compare adjacent keys. Specifically, consecutive iterators i and - * i+1 in the range [first1, last1) belong to the same segment if - * binary_pred(*i, *(i+1)) is true, and belong to different segments otherwise. - * - * This version of \p exclusive_scan_by_key uses the associative operator - * \c binary_op to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param init The initial of the exclusive sum value. - * \param binary_pred The binary predicate used to determine equality of keys. - * \param binary_op The associatve operator used to 'sum' values. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then - * binary_op(x,y) is defined. - * \tparam T is convertible to \c OutputIterator's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is convertible to \c OutputIterator's \c value_type. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan_by_key using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * - * int init = 5; - * - * thrust::equal_to binary_pred; - * thrust::plus binary_op; - * - * thrust::exclusive_scan_by_key(thrust::host, key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place scan - * - * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; - * \endcode - * - * \see exclusive_scan - * \see inclusive_scan_by_key - * - */ -template - OutputIterator exclusive_scan_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred, - AssociativeOperator binary_op); - - -/*! \p exclusive_scan_by_key computes an exclusive key-value or 'segmented' prefix - * sum operation. The term 'exclusive' means that each result does not include - * the corresponding input operand in the partial sum. The term 'segmented' - * means that the partial sums are broken into distinct segments. In other - * words, within each segment a separate exclusive scan operation is computed. - * Refer to the code sample below for example usage. - * - * This version of \p exclusive_scan_by_key uses the value \c init to - * initialize the exclusive scan operation. - * - * This version of \p exclusive_scan_by_key uses the binary predicate \c binary_pred - * to compare adjacent keys. Specifically, consecutive iterators i and - * i+1 in the range [first1, last1) belong to the same segment if - * binary_pred(*i, *(i+1)) is true, and belong to different segments otherwise. - * - * This version of \p exclusive_scan_by_key uses the associative operator - * \c binary_op to perform the prefix sum. When the input and output sequences - * are the same, the scan is performed in-place. - * - * \param first1 The beginning of the key sequence. - * \param last1 The end of the key sequence. - * \param first2 The beginning of the input value sequence. - * \param result The beginning of the output value sequence. - * \param init The initial of the exclusive sum value. - * \param binary_pred The binary predicate used to determine equality of keys. - * \param binary_op The associatve operator used to 'sum' values. - * \return The end of the output sequence. - * - * \tparam InputIterator1 is a model of Input Iterator - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam OutputIterator is a model of Output Iterator, - * and if \c x and \c y are objects of \c OutputIterator's \c value_type, then - * binary_op(x,y) is defined. - * \tparam T is convertible to \c OutputIterator's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is convertible to \c OutputIterator's \c value_type. - * - * \pre \p first1 may equal \p result but the range [first1, last1) and the range [result, result + (last1 - first1)) shall not overlap otherwise. - * \pre \p first2 may equal \p result but the range [first2, first2 + (last1 - first1) and range [result, result + (last1 - first1)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p exclusive_scan_by_key - * - * \code - * #include - * #include - * - * int keys[10] = {0, 0, 0, 1, 1, 2, 3, 3, 3, 3}; - * int vals[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - * - * int init = 5; - * - * thrust::equal_to binary_pred; - * thrust::plus binary_op; - * - * thrust::exclusive_scan_by_key(key, key + 10, vals, vals, init, binary_pred, binary_op); // in-place scan - * - * // vals is now {5, 6, 7, 5, 6, 5, 5, 6, 7, 8}; - * \endcode - * - * \see exclusive_scan - * \see inclusive_scan_by_key - * - */ -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred, - AssociativeOperator binary_op); - - -/*! \} // end segmentedprefixsums - */ - - -/*! \} // end prefix sums - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/scatter.h b/compat/thrust/scatter.h deleted file mode 100644 index 59604ca170..0000000000 --- a/compat/thrust/scatter.h +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scatter.h - * \brief Irregular copying to a destination range - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup scattering - * \ingroup copying - * \{ - */ - - -/*! \p scatter copies elements from a source range into an output array - * according to a map. For each iterator \c i in the range [\p first, \p last), - * the value \c *i is assigned to output[*(map + (i - first))]. The - * output iterator must permit random access. If the same index - * appears more than once in the range [map, map + (last - first)), - * the result is undefined. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first Beginning of the sequence of values to scatter. - * \param last End of the sequence of values to scatter. - * \param map Beginning of the sequence of output indices. - * \param result Destination of the source elements. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam RandomAccessIterator must be a model of Random Access iterator. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The expression `result[*i]` shall be valid for all iterators in the range `[map,map + (last - first))`. - * - * The following code snippet demonstrates how to use \p scatter to - * reorder a range using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * // mark even indices with a 1; odd indices with a 0 - * int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - * thrust::device_vector d_values(values, values + 10); - * - * // scatter all even indices into the first half of the - * // range, and odd indices vice versa - * int map[10] = {0, 5, 1, 6, 2, 7, 3, 8, 4, 9}; - * thrust::device_vector d_map(map, map + 10); - * - * thrust::device_vector d_output(10); - * thrust::scatter(thrust::device, - * d_values.begin(), d_values.end(), - * d_map.begin(), d_output.begin()); - * // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} - * \endcode - * - * \note \p scatter is the inverse of thrust::gather. - */ -template - void scatter(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - RandomAccessIterator result); - - -/*! \p scatter copies elements from a source range into an output array - * according to a map. For each iterator \c i in the range [\p first, \p last), - * the value \c *i is assigned to output[*(map + (i - first))]. The - * output iterator must permit random access. If the same index - * appears more than once in the range [map, map + (last - first)), - * the result is undefined. - * - * \param first Beginning of the sequence of values to scatter. - * \param last End of the sequence of values to scatter. - * \param map Beginning of the sequence of output indices. - * \param result Destination of the source elements. - * - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam RandomAccessIterator must be a model of Random Access iterator. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The expression `result[*i]` shall be valid for all iterators in the range `[map,map + (last - first))`. - * - * The following code snippet demonstrates how to use \p scatter to - * reorder a range. - * - * \code - * #include - * #include - * ... - * // mark even indices with a 1; odd indices with a 0 - * int values[10] = {1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - * thrust::device_vector d_values(values, values + 10); - * - * // scatter all even indices into the first half of the - * // range, and odd indices vice versa - * int map[10] = {0, 5, 1, 6, 2, 7, 3, 8, 4, 9}; - * thrust::device_vector d_map(map, map + 10); - * - * thrust::device_vector d_output(10); - * thrust::scatter(d_values.begin(), d_values.end(), - * d_map.begin(), d_output.begin()); - * // d_output is now {1, 1, 1, 1, 1, 0, 0, 0, 0, 0} - * \endcode - * - * \note \p scatter is the inverse of thrust::gather. - */ -template - void scatter(InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - RandomAccessIterator result); - - -/*! \p scatter_if conditionally copies elements from a source range into an - * output array according to a map. For each iterator \c i in the - * range [first, last) such that *(stencil + (i - first)) is - * true, the value \c *i is assigned to output[*(map + (i - first))]. - * The output iterator must permit random access. If the same index - * appears more than once in the range [map, map + (last - first)) - * the result is undefined. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first Beginning of the sequence of values to scatter. - * \param last End of the sequence of values to scatter. - * \param map Beginning of the sequence of output indices. - * \param stencil Beginning of the sequence of predicate values. - * \param output Beginning of the destination range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam InputIterator3 must be a model of Input Iterator and \c InputIterator3's \c value_type must be convertible to \c bool. - * \tparam RandomAccessIterator must be a model of Random Access iterator. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `*(stencil + i) != false`. - * - * \code - * #include - * #include - * ... - * int V[8] = {10, 20, 30, 40, 50, 60, 70, 80}; - * int M[8] = {0, 5, 1, 6, 2, 7, 3, 4}; - * int S[8] = {1, 0, 1, 0, 1, 0, 1, 0}; - * int D[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - * - * thrust::scatter_if(thrust::host, V, V + 8, M, S, D); - * - * // D contains [10, 30, 50, 70, 0, 0, 0, 0]; - * \endcode - * - * \note \p scatter_if is the inverse of thrust::gather_if. - */ -template - void scatter_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output); - - -/*! \p scatter_if conditionally copies elements from a source range into an - * output array according to a map. For each iterator \c i in the - * range [first, last) such that *(stencil + (i - first)) is - * true, the value \c *i is assigned to output[*(map + (i - first))]. - * The output iterator must permit random access. If the same index - * appears more than once in the range [map, map + (last - first)) - * the result is undefined. - * - * \param first Beginning of the sequence of values to scatter. - * \param last End of the sequence of values to scatter. - * \param map Beginning of the sequence of output indices. - * \param stencil Beginning of the sequence of predicate values. - * \param output Beginning of the destination range. - * - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam InputIterator3 must be a model of Input Iterator and \c InputIterator3's \c value_type must be convertible to \c bool. - * \tparam RandomAccessIterator must be a model of Random Access iterator. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `*(stencil + i) != false`. - * - * \code - * #include - * ... - * int V[8] = {10, 20, 30, 40, 50, 60, 70, 80}; - * int M[8] = {0, 5, 1, 6, 2, 7, 3, 4}; - * int S[8] = {1, 0, 1, 0, 1, 0, 1, 0}; - * int D[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - * - * thrust::scatter_if(V, V + 8, M, S, D); - * - * // D contains [10, 30, 50, 70, 0, 0, 0, 0]; - * \endcode - * - * \note \p scatter_if is the inverse of thrust::gather_if. - */ -template - void scatter_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output); - - -/*! \p scatter_if conditionally copies elements from a source range into an - * output array according to a map. For each iterator \c i in the - * range [first, last) such that pred(*(stencil + (i - first))) is - * \c true, the value \c *i is assigned to output[*(map + (i - first))]. - * The output iterator must permit random access. If the same index - * appears more than once in the range [map, map + (last - first)) - * the result is undefined. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first Beginning of the sequence of values to scatter. - * \param last End of the sequence of values to scatter. - * \param map Beginning of the sequence of output indices. - * \param stencil Beginning of the sequence of predicate values. - * \param output Beginning of the destination range. - * \param pred Predicate to apply to the stencil values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam InputIterator3 must be a model of Input Iterator and \c InputIterator3's \c value_type must be convertible to \c Predicate's \c argument_type. - * \tparam RandomAccessIterator must be a model of Random Access iterator. - * \tparam Predicate must be a model of Predicate. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `pred(*(stencil + i)) != false`. - * - * \code - * #include - * #include - * - * struct is_even - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return (x % 2) == 0; - * } - * }; - * - * ... - * - * int V[8] = {10, 20, 30, 40, 50, 60, 70, 80}; - * int M[8] = {0, 5, 1, 6, 2, 7, 3, 4}; - * int S[8] = {2, 1, 2, 1, 2, 1, 2, 1}; - * int D[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - * - * is_even pred; - * thrust::scatter_if(thrust::host, V, V + 8, M, S, D, pred); - * - * // D contains [10, 30, 50, 70, 0, 0, 0, 0]; - * \endcode - * - * \note \p scatter_if is the inverse of thrust::gather_if. - */ -template - void scatter_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output, - Predicate pred); - - -/*! \p scatter_if conditionally copies elements from a source range into an - * output array according to a map. For each iterator \c i in the - * range [first, last) such that pred(*(stencil + (i - first))) is - * \c true, the value \c *i is assigned to output[*(map + (i - first))]. - * The output iterator must permit random access. If the same index - * appears more than once in the range [map, map + (last - first)) - * the result is undefined. - * - * \param first Beginning of the sequence of values to scatter. - * \param last End of the sequence of values to scatter. - * \param map Beginning of the sequence of output indices. - * \param stencil Beginning of the sequence of predicate values. - * \param output Beginning of the destination range. - * \param pred Predicate to apply to the stencil values. - * - * \tparam InputIterator1 must be a model of Input Iterator and \c InputIterator1's \c value_type must be convertible to \c RandomAccessIterator's \c value_type. - * \tparam InputIterator2 must be a model of Input Iterator and \c InputIterator2's \c value_type must be convertible to \c RandomAccessIterator's \c difference_type. - * \tparam InputIterator3 must be a model of Input Iterator and \c InputIterator3's \c value_type must be convertible to \c Predicate's \c argument_type. - * \tparam RandomAccessIterator must be a model of Random Access iterator. - * \tparam Predicate must be a model of Predicate. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[first,last)` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[map,map + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The iterator `result + i` shall not refer to any element referenced by any iterator `j` in the range `[stencil,stencil + (last - first))` for all iterators `i` in the range `[map,map + (last - first))`. - * - * \pre The expression `result[*i]` shall be valid for all iterators `i` in the range `[map,map + (last - first))` for which the following condition holds: `pred(*(stencil + i)) != false`. - * - * \code - * #include - * - * struct is_even - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return (x % 2) == 0; - * } - * }; - * - * ... - * - * int V[8] = {10, 20, 30, 40, 50, 60, 70, 80}; - * int M[8] = {0, 5, 1, 6, 2, 7, 3, 4}; - * int S[8] = {2, 1, 2, 1, 2, 1, 2, 1}; - * int D[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - * - * is_even pred; - * thrust::scatter_if(V, V + 8, M, S, D, pred); - * - * // D contains [10, 30, 50, 70, 0, 0, 0, 0]; - * \endcode - * - * \note \p scatter_if is the inverse of thrust::gather_if. - */ -template - void scatter_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output, - Predicate pred); - - -/*! \} // end scattering - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/sequence.h b/compat/thrust/sequence.h deleted file mode 100644 index 6c54a5bbf4..0000000000 --- a/compat/thrust/sequence.h +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file sequence.h - * \brief Fills a range with a sequence of numbers - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup transformations - * \{ - */ - - -/*! \p sequence fills the range [first, last) with a sequence of numbers. - * - * For each iterator \c i in the range [first, last), this version of - * \p sequence performs the assignment *i = (i - first). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, - * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. - * - * The following code snippet demonstrates how to use \p sequence to fill a range - * with a sequence of numbers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 10; - * int A[N]; - * thrust::sequence(thrust::host, A, A + 10); - * // A is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} - * \endcode - * - * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no - * guarantee on order of execution. - * - * \see http://www.sgi.com/tech/stl/iota.html - */ -template - void sequence(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last); - - -/*! \p sequence fills the range [first, last) with a sequence of numbers. - * - * For each iterator \c i in the range [first, last), this version of - * \p sequence performs the assignment *i = (i - first). - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, - * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. - * - * The following code snippet demonstrates how to use \p sequence to fill a range - * with a sequence of numbers. - * - * \code - * #include - * ... - * const int N = 10; - * int A[N]; - * thrust::sequence(A, A + 10); - * // A is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} - * \endcode - * - * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no - * guarantee on order of execution. - * - * \see http://www.sgi.com/tech/stl/iota.html - */ -template - void sequence(ForwardIterator first, - ForwardIterator last); - - -/*! \p sequence fills the range [first, last) with a sequence of numbers. - * - * For each iterator \c i in the range [first, last), this version of - * \p sequence performs the assignment *i = init + (i - first). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param init The first value of the sequence of numbers. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, - * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p sequence to fill a range - * with a sequence of numbers starting from the value 1 using the \p thrust::host execution - * policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 10; - * int A[N]; - * thrust::sequence(thrust::host, A, A + 10, 1); - * // A is now {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * \endcode - * - * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no - * guarantee on order of execution. - * - * \see http://www.sgi.com/tech/stl/iota.html - */ -template - void sequence(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - T init); - - -/*! \p sequence fills the range [first, last) with a sequence of numbers. - * - * For each iterator \c i in the range [first, last), this version of - * \p sequence performs the assignment *i = init + (i - first). - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param init The first value of the sequence of numbers. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, - * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p sequence to fill a range - * with a sequence of numbers starting from the value 1. - * - * \code - * #include - * ... - * const int N = 10; - * int A[N]; - * thrust::sequence(A, A + 10, 1); - * // A is now {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - * \endcode - * - * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no - * guarantee on order of execution. - * - * \see http://www.sgi.com/tech/stl/iota.html - */ -template - void sequence(ForwardIterator first, - ForwardIterator last, - T init); - - -/*! \p sequence fills the range [first, last) with a sequence of numbers. - * - * For each iterator \c i in the range [first, last), this version of - * \p sequence performs the assignment *i = init + step * (i - first). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param init The first value of the sequence of numbers - * \param step The difference between consecutive elements. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, - * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p sequence to fill a range - * with a sequence of numbers starting from the value 1 with a step size of 3 using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 10; - * int A[N]; - * thrust::sequence(thrust::host, A, A + 10, 1, 3); - * // A is now {1, 4, 7, 10, 13, 16, 19, 22, 25, 28} - * \endcode - * - * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no - * guarantee on order of execution. - * - * \see http://www.sgi.com/tech/stl/iota.html - */ -template - void sequence(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - T init, - T step); - - -/*! \p sequence fills the range [first, last) with a sequence of numbers. - * - * For each iterator \c i in the range [first, last), this version of - * \p sequence performs the assignment *i = init + step * (i - first). - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param init The first value of the sequence of numbers - * \param step The difference between consecutive elements. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, - * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. - * \tparam T is a model of Assignable, - * and \p T is convertible to \p ForwardIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p sequence to fill a range - * with a sequence of numbers starting from the value 1 with a step size of 3. - * - * \code - * #include - * ... - * const int N = 10; - * int A[N]; - * thrust::sequence(A, A + 10, 1, 3); - * // A is now {1, 4, 7, 10, 13, 16, 19, 22, 25, 28} - * \endcode - * - * \note Unlike the similar C++ STL function \c std::iota, \p sequence offers no - * guarantee on order of execution. - * - * \see http://www.sgi.com/tech/stl/iota.html - */ -template - void sequence(ForwardIterator first, - ForwardIterator last, - T init, - T step); - - -/*! \} // end transformations - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/set_operations.h b/compat/thrust/set_operations.h deleted file mode 100644 index a7ee624f92..0000000000 --- a/compat/thrust/set_operations.h +++ /dev/null @@ -1,2947 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file set_operations.h - * \brief Set theoretic operations for sorted ranges - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup set_operations Set Operations - * \ingroup algorithms - * \{ - */ - - -/*! \p set_difference constructs a sorted range that is the set difference of the sorted - * ranges [first1, last1) and [first2, last2). The return value is the - * end of the output range. - * - * In the simplest case, \p set_difference performs the "difference" operation from set - * theory: the output range contains a copy of every element that is contained in - * [first1, last1) and not contained in [first2, last1). The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements - * that are equivalent to each other and if [first2, last2) contains \c n - * elements that are equivalent to them, the last max(m-n,0) elements from - * [first1, last1) range shall be copied to the output range. - * - * This version of \p set_difference compares elements using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_difference to compute the - * set difference of two sets of integers sorted in ascending order using the \p thrust::host execution - * policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A1[6] = {0, 1, 3, 4, 5, 6, 9}; - * int A2[5] = {1, 3, 5, 7, 9}; - * - * int result[3]; - * - * int *result_end = thrust::set_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result); - * // result is now {0, 4, 6} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_difference.html - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p set_difference constructs a sorted range that is the set difference of the sorted - * ranges [first1, last1) and [first2, last2). The return value is the - * end of the output range. - * - * In the simplest case, \p set_difference performs the "difference" operation from set - * theory: the output range contains a copy of every element that is contained in - * [first1, last1) and not contained in [first2, last1). The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements - * that are equivalent to each other and if [first2, last2) contains \c n - * elements that are equivalent to them, the last max(m-n,0) elements from - * [first1, last1) range shall be copied to the output range. - * - * This version of \p set_difference compares elements using \c operator<. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_difference to compute the - * set difference of two sets of integers sorted in ascending order. - * - * \code - * #include - * ... - * int A1[6] = {0, 1, 3, 4, 5, 6, 9}; - * int A2[5] = {1, 3, 5, 7, 9}; - * - * int result[3]; - * - * int *result_end = thrust::set_difference(A1, A1 + 6, A2, A2 + 5, result); - * // result is now {0, 4, 6} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_difference.html - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p set_difference constructs a sorted range that is the set difference of the sorted - * ranges [first1, last1) and [first2, last2). The return value is the - * end of the output range. - * - * In the simplest case, \p set_difference performs the "difference" operation from set - * theory: the output range contains a copy of every element that is contained in - * [first1, last1) and not contained in [first2, last1). The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements - * that are equivalent to each other and if [first2, last2) contains \c n - * elements that are equivalent to them, the last max(m-n,0) elements from - * [first1, last1) range shall be copied to the output range. - * - * This version of \p set_difference compares elements using a function object \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_difference to compute the - * set difference of two sets of integers sorted in descending order using the \p thrust::host execution - * policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A1[6] = {9, 6, 5, 4, 3, 1, 0}; - * int A2[5] = {9, 7, 5, 3, 1}; - * - * int result[3]; - * - * int *result_end = thrust::set_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result, thrust::greater()); - * // result is now {6, 4, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_difference.html - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_difference(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p set_difference constructs a sorted range that is the set difference of the sorted - * ranges [first1, last1) and [first2, last2). The return value is the - * end of the output range. - * - * In the simplest case, \p set_difference performs the "difference" operation from set - * theory: the output range contains a copy of every element that is contained in - * [first1, last1) and not contained in [first2, last1). The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements - * that are equivalent to each other and if [first2, last2) contains \c n - * elements that are equivalent to them, the last max(m-n,0) elements from - * [first1, last1) range shall be copied to the output range. - * - * This version of \p set_difference compares elements using a function object \p comp. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_difference to compute the - * set difference of two sets of integers sorted in descending order. - * - * \code - * #include - * #include - * ... - * int A1[6] = {9, 6, 5, 4, 3, 1, 0}; - * int A2[5] = {9, 7, 5, 3, 1}; - * - * int result[3]; - * - * int *result_end = thrust::set_difference(A1, A1 + 6, A2, A2 + 5, result, thrust::greater()); - * // result is now {6, 4, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_difference.html - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p set_intersection constructs a sorted range that is the - * intersection of sorted ranges [first1, last1) and - * [first2, last2). The return value is the end of the - * output range. - * - * In the simplest case, \p set_intersection performs the - * "intersection" operation from set theory: the output range - * contains a copy of every element that is contained in both - * [first1, last1) and [first2, last2). The - * general case is more complicated, because the input ranges may - * contain duplicate elements. The generalization is that if a value - * appears \c m times in [first1, last1) and \c n times in - * [first2, last2) (where \c m may be zero), then it - * appears min(m,n) times in the output range. - * \p set_intersection is stable, meaning that both elements are - * copied from the first range rather than the second, and that the - * relative order of elements in the output range is the same as in - * the first input range. - * - * This version of \p set_intersection compares objects using - * \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_intersection to compute the - * set intersection of two sets of integers sorted in ascending order using the \p thrust::host execution - * policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A1[6] = {1, 3, 5, 7, 9, 11}; - * int A2[7] = {1, 1, 2, 3, 5, 8, 13}; - * - * int result[7]; - * - * int *result_end = thrust::set_intersection(thrust::host, A1, A1 + 6, A2, A2 + 7, result); - * // result is now {1, 3, 5} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_intersection.html - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p set_intersection constructs a sorted range that is the - * intersection of sorted ranges [first1, last1) and - * [first2, last2). The return value is the end of the - * output range. - * - * In the simplest case, \p set_intersection performs the - * "intersection" operation from set theory: the output range - * contains a copy of every element that is contained in both - * [first1, last1) and [first2, last2). The - * general case is more complicated, because the input ranges may - * contain duplicate elements. The generalization is that if a value - * appears \c m times in [first1, last1) and \c n times in - * [first2, last2) (where \c m may be zero), then it - * appears min(m,n) times in the output range. - * \p set_intersection is stable, meaning that both elements are - * copied from the first range rather than the second, and that the - * relative order of elements in the output range is the same as in - * the first input range. - * - * This version of \p set_intersection compares objects using - * \c operator<. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_intersection to compute the - * set intersection of two sets of integers sorted in ascending order. - * - * \code - * #include - * ... - * int A1[6] = {1, 3, 5, 7, 9, 11}; - * int A2[7] = {1, 1, 2, 3, 5, 8, 13}; - * - * int result[7]; - * - * int *result_end = thrust::set_intersection(A1, A1 + 6, A2, A2 + 7, result); - * // result is now {1, 3, 5} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_intersection.html - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_intersection(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p set_intersection constructs a sorted range that is the - * intersection of sorted ranges [first1, last1) and - * [first2, last2). The return value is the end of the - * output range. - * - * In the simplest case, \p set_intersection performs the - * "intersection" operation from set theory: the output range - * contains a copy of every element that is contained in both - * [first1, last1) and [first2, last2). The - * general case is more complicated, because the input ranges may - * contain duplicate elements. The generalization is that if a value - * appears \c m times in [first1, last1) and \c n times in - * [first2, last2) (where \c m may be zero), then it - * appears min(m,n) times in the output range. - * \p set_intersection is stable, meaning that both elements are - * copied from the first range rather than the second, and that the - * relative order of elements in the output range is the same as in - * the first input range. - * - * This version of \p set_intersection compares elements using a function object \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * The following code snippet demonstrates how to use \p set_intersection to compute - * the set intersection of sets of integers sorted in descending order using the \p thrust::host execution - * policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A1[6] = {11, 9, 7, 5, 3, 1}; - * int A2[7] = {13, 8, 5, 3, 2, 1, 1}; - * - * int result[3]; - * - * int *result_end = thrust::set_intersection(thrust::host, A1, A1 + 6, A2, A2 + 7, result, thrust::greater()); - * // result is now {5, 3, 1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_intersection.html - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_intersection(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p set_intersection constructs a sorted range that is the - * intersection of sorted ranges [first1, last1) and - * [first2, last2). The return value is the end of the - * output range. - * - * In the simplest case, \p set_intersection performs the - * "intersection" operation from set theory: the output range - * contains a copy of every element that is contained in both - * [first1, last1) and [first2, last2). The - * general case is more complicated, because the input ranges may - * contain duplicate elements. The generalization is that if a value - * appears \c m times in [first1, last1) and \c n times in - * [first2, last2) (where \c m may be zero), then it - * appears min(m,n) times in the output range. - * \p set_intersection is stable, meaning that both elements are - * copied from the first range rather than the second, and that the - * relative order of elements in the output range is the same as in - * the first input range. - * - * This version of \p set_intersection compares elements using a function object \p comp. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * The following code snippet demonstrates how to use \p set_intersection to compute - * the set intersection of sets of integers sorted in descending order. - * - * \code - * #include - * ... - * int A1[6] = {11, 9, 7, 5, 3, 1}; - * int A2[7] = {13, 8, 5, 3, 2, 1, 1}; - * - * int result[3]; - * - * int *result_end = thrust::set_intersection(A1, A1 + 6, A2, A2 + 7, result, thrust::greater()); - * // result is now {5, 3, 1} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_intersection.html - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_intersection(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric - * difference of the sorted ranges [first1, last1) and [first2, last2). - * The return value is the end of the output range. - * - * In the simplest case, \p set_symmetric_difference performs a set theoretic calculation: - * it constructs the union of the two sets A - B and B - A, where A and B are the two - * input ranges. That is, the output range contains a copy of every element that is - * contained in [first1, last1) but not [first2, last1), and a copy of - * every element that is contained in [first2, last2) but not [first1, last1). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements that are - * equivalent to each other and [first2, last1) contains \c n elements that are - * equivalent to them, then |m - n| of those elements shall be copied to the output - * range: the last m - n elements from [first1, last1) if m > n, and - * the last n - m of these elements from [first2, last2) if m < n. - * - * This version of \p set_union compares elements using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference to compute - * the symmetric difference of two sets of integers sorted in ascending order using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A1[6] = {0, 1, 2, 2, 4, 6, 7}; - * int A2[5] = {1, 1, 2, 5, 8}; - * - * int result[6]; - * - * int *result_end = thrust::set_symmetric_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result); - * // result = {0, 4, 5, 6, 7, 8} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_symmetric_difference.html - * \see \p merge - * \see \p includes - * \see \p set_difference - * \see \p set_union - * \see \p set_intersection - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric - * difference of the sorted ranges [first1, last1) and [first2, last2). - * The return value is the end of the output range. - * - * In the simplest case, \p set_symmetric_difference performs a set theoretic calculation: - * it constructs the union of the two sets A - B and B - A, where A and B are the two - * input ranges. That is, the output range contains a copy of every element that is - * contained in [first1, last1) but not [first2, last1), and a copy of - * every element that is contained in [first2, last2) but not [first1, last1). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements that are - * equivalent to each other and [first2, last1) contains \c n elements that are - * equivalent to them, then |m - n| of those elements shall be copied to the output - * range: the last m - n elements from [first1, last1) if m > n, and - * the last n - m of these elements from [first2, last2) if m < n. - * - * This version of \p set_union compares elements using \c operator<. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference to compute - * the symmetric difference of two sets of integers sorted in ascending order. - * - * \code - * #include - * ... - * int A1[6] = {0, 1, 2, 2, 4, 6, 7}; - * int A2[5] = {1, 1, 2, 5, 8}; - * - * int result[6]; - * - * int *result_end = thrust::set_symmetric_difference(A1, A1 + 6, A2, A2 + 5, result); - * // result = {0, 4, 5, 6, 7, 8} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_symmetric_difference.html - * \see \p merge - * \see \p includes - * \see \p set_difference - * \see \p set_union - * \see \p set_intersection - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_symmetric_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric - * difference of the sorted ranges [first1, last1) and [first2, last2). - * The return value is the end of the output range. - * - * In the simplest case, \p set_symmetric_difference performs a set theoretic calculation: - * it constructs the union of the two sets A - B and B - A, where A and B are the two - * input ranges. That is, the output range contains a copy of every element that is - * contained in [first1, last1) but not [first2, last1), and a copy of - * every element that is contained in [first2, last2) but not [first1, last1). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements that are - * equivalent to each other and [first2, last1) contains \c n elements that are - * equivalent to them, then |m - n| of those elements shall be copied to the output - * range: the last m - n elements from [first1, last1) if m > n, and - * the last n - m of these elements from [first2, last2) if m < n. - * - * This version of \p set_union compares elements using a function object \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference to compute - * the symmetric difference of two sets of integers sorted in descending order using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A1[6] = {7, 6, 4, 2, 2, 1, 0}; - * int A2[5] = {8, 5, 2, 1, 1}; - * - * int result[6]; - * - * int *result_end = thrust::set_symmetric_difference(thrust::host, A1, A1 + 6, A2, A2 + 5, result); - * // result = {8, 7, 6, 5, 4, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_symmetric_difference.html - * \see \p merge - * \see \p includes - * \see \p set_difference - * \see \p set_union - * \see \p set_intersection - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_symmetric_difference(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p set_symmetric_difference constructs a sorted range that is the set symmetric - * difference of the sorted ranges [first1, last1) and [first2, last2). - * The return value is the end of the output range. - * - * In the simplest case, \p set_symmetric_difference performs a set theoretic calculation: - * it constructs the union of the two sets A - B and B - A, where A and B are the two - * input ranges. That is, the output range contains a copy of every element that is - * contained in [first1, last1) but not [first2, last1), and a copy of - * every element that is contained in [first2, last2) but not [first1, last1). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements that are - * equivalent to each other and [first2, last1) contains \c n elements that are - * equivalent to them, then |m - n| of those elements shall be copied to the output - * range: the last m - n elements from [first1, last1) if m > n, and - * the last n - m of these elements from [first2, last2) if m < n. - * - * This version of \p set_union compares elements using a function object \p comp. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference to compute - * the symmetric difference of two sets of integers sorted in descending order. - * - * \code - * #include - * ... - * int A1[6] = {7, 6, 4, 2, 2, 1, 0}; - * int A2[5] = {8, 5, 2, 1, 1}; - * - * int result[6]; - * - * int *result_end = thrust::set_symmetric_difference(A1, A1 + 6, A2, A2 + 5, result); - * // result = {8, 7, 6, 5, 4, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_symmetric_difference.html - * \see \p merge - * \see \p includes - * \see \p set_difference - * \see \p set_union - * \see \p set_intersection - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_symmetric_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p set_union constructs a sorted range that is the union of the sorted ranges - * [first1, last1) and [first2, last2). The return value is the - * end of the output range. - * - * In the simplest case, \p set_union performs the "union" operation from set - * theory: the output range contains a copy of every element that is contained in - * [first1, last1), [first2, last1), or both. The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements - * that are equivalent to each other and if [first2, last2) contains \c n - * elements that are equivalent to them, then all \c m elements from the first - * range shall be copied to the output range, in order, and then max(n - m, 0) - * elements from the second range shall be copied to the output, in order. - * - * This version of \p set_union compares elements using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_union to compute the union of - * two sets of integers sorted in ascending order using the \p thrust::host execution policy for - * parallelization: - * - * \code - * #include - * #include - * ... - * int A1[6] = {0, 2, 4, 6, 8, 10, 12}; - * int A2[5] = {1, 3, 5, 7, 9}; - * - * int result[11]; - * - * int *result_end = thrust::set_union(thrust::host, A1, A1 + 6, A2, A2 + 5, result); - * // result = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_union.html - * \see \p merge - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_union(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p set_union constructs a sorted range that is the union of the sorted ranges - * [first1, last1) and [first2, last2). The return value is the - * end of the output range. - * - * In the simplest case, \p set_union performs the "union" operation from set - * theory: the output range contains a copy of every element that is contained in - * [first1, last1), [first2, last1), or both. The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements - * that are equivalent to each other and if [first2, last2) contains \c n - * elements that are equivalent to them, then all \c m elements from the first - * range shall be copied to the output range, in order, and then max(n - m, 0) - * elements from the second range shall be copied to the output, in order. - * - * This version of \p set_union compares elements using \c operator<. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to operator<. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_union to compute the union of - * two sets of integers sorted in ascending order. - * - * \code - * #include - * ... - * int A1[6] = {0, 2, 4, 6, 8, 10, 12}; - * int A2[5] = {1, 3, 5, 7, 9}; - * - * int result[11]; - * - * int *result_end = thrust::set_union(A1, A1 + 6, A2, A2 + 5, result); - * // result = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_union.html - * \see \p merge - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_union(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -/*! \p set_union constructs a sorted range that is the union of the sorted ranges - * [first1, last1) and [first2, last2). The return value is the - * end of the output range. - * - * In the simplest case, \p set_union performs the "union" operation from set - * theory: the output range contains a copy of every element that is contained in - * [first1, last1), [first2, last1), or both. The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements - * that are equivalent to each other and if [first2, last2) contains \c n - * elements that are equivalent to them, then all \c m elements from the first - * range shall be copied to the output range, in order, and then max(n - m, 0) - * elements from the second range shall be copied to the output, in order. - * - * This version of \p set_union compares elements using a function object \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_union to compute the union of - * two sets of integers sorted in ascending order using the \p thrust::host execution policy for - * parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A1[6] = {12, 10, 8, 6, 4, 2, 0}; - * int A2[5] = {9, 7, 5, 3, 1}; - * - * int result[11]; - * - * int *result_end = thrust::set_union(thrust::host, A1, A1 + 6, A2, A2 + 5, result, thrust::greater()); - * // result = {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_union.html - * \see \p merge - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_union(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p set_union constructs a sorted range that is the union of the sorted ranges - * [first1, last1) and [first2, last2). The return value is the - * end of the output range. - * - * In the simplest case, \p set_union performs the "union" operation from set - * theory: the output range contains a copy of every element that is contained in - * [first1, last1), [first2, last1), or both. The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [first1, last1) contains \c m elements - * that are equivalent to each other and if [first2, last2) contains \c n - * elements that are equivalent to them, then all \c m elements from the first - * range shall be copied to the output range, in order, and then max(n - m, 0) - * elements from the second range shall be copied to the output, in order. - * - * This version of \p set_union compares elements using a function object \p comp. - * - * \param first1 The beginning of the first input range. - * \param last1 The end of the first input range. - * \param first2 The beginning of the second input range. - * \param last2 The end of the second input range. - * \param result The beginning of the output range. - * \param comp Comparison operator. - * \return The end of the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1's \c value_type is convertable to \p StrictWeakCompare's \c first_argument_type. - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2's \c value_type is convertable to \p StrictWeakCompare's \c second_argument_type. - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [first1, last1) and [first2, last2) shall be sorted with respect to \p comp. - * \pre The resulting range shall not overlap with either input range. - * - * The following code snippet demonstrates how to use \p set_union to compute the union of - * two sets of integers sorted in ascending order. - * - * \code - * #include - * #include - * ... - * int A1[6] = {12, 10, 8, 6, 4, 2, 0}; - * int A2[5] = {9, 7, 5, 3, 1}; - * - * int result[11]; - * - * int *result_end = thrust::set_union(A1, A1 + 6, A2, A2 + 5, result, thrust::greater()); - * // result = {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0} - * \endcode - * - * \see http://www.sgi.com/tech/stl/set_union.html - * \see \p merge - * \see \p includes - * \see \p set_union - * \see \p set_intersection - * \see \p set_symmetric_difference - * \see \p sort - * \see \p is_sorted - */ -template - OutputIterator set_union(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakCompare comp); - - -/*! \p set_difference_by_key performs a key-value difference operation from set theory. - * \p set_difference_by_key constructs a sorted range that is the difference of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_difference_by_key performs the "difference" operation from set - * theory: the keys output range contains a copy of every element that is contained in - * [keys_first1, keys_last1) and not contained in [keys_first2, keys_last2). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements - * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n - * elements that are equivalent to them, the last max(m-n,0) elements from - * [keys_first1, keys_last1) range shall be copied to the output range. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_difference_by_key compares key elements using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_difference_by_key to compute the - * set difference of two sets of integers sorted in ascending order with their values using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {0, 1, 3, 4, 5, 6, 9}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {1, 3, 5, 7, 9}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[3]; - * int vals_result[3]; - * - * thrust::pair end = thrust::set_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); - * // keys_result is now {0, 4, 6} - * // vals_result is now {0, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_intersection_by_key - * \see \p set_symmetric_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_difference_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p set_difference_by_key performs a key-value difference operation from set theory. - * \p set_difference_by_key constructs a sorted range that is the difference of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_difference_by_key performs the "difference" operation from set - * theory: the keys output range contains a copy of every element that is contained in - * [keys_first1, keys_last1) and not contained in [keys_first2, keys_last2). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements - * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n - * elements that are equivalent to them, the last max(m-n,0) elements from - * [keys_first1, keys_last1) range shall be copied to the output range. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_difference_by_key compares key elements using \c operator<. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_difference_by_key to compute the - * set difference of two sets of integers sorted in ascending order with their values. - * - * \code - * #include - * ... - * int A_keys[6] = {0, 1, 3, 4, 5, 6, 9}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {1, 3, 5, 7, 9}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[3]; - * int vals_result[3]; - * - * thrust::pair end = thrust::set_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); - * // keys_result is now {0, 4, 6} - * // vals_result is now {0, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_intersection_by_key - * \see \p set_symmetric_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_difference_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p set_difference_by_key performs a key-value difference operation from set theory. - * \p set_difference_by_key constructs a sorted range that is the difference of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_difference_by_key performs the "difference" operation from set - * theory: the keys output range contains a copy of every element that is contained in - * [keys_first1, keys_last1) and not contained in [keys_first2, keys_last2). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements - * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n - * elements that are equivalent to them, the last max(m-n,0) elements from - * [keys_first1, keys_last1) range shall be copied to the output range. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_difference_by_key compares key elements using a function object \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_difference_by_key to compute the - * set difference of two sets of integers sorted in descending order with their values using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A_keys[6] = {9, 6, 5, 4, 3, 1, 0}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {9, 7, 5, 3, 1}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[3]; - * int vals_result[3]; - * - * thrust::pair end = thrust::set_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater()); - * // keys_result is now {0, 4, 6} - * // vals_result is now {0, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_intersection_by_key - * \see \p set_symmetric_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_difference_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \p set_difference_by_key performs a key-value difference operation from set theory. - * \p set_difference_by_key constructs a sorted range that is the difference of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_difference_by_key performs the "difference" operation from set - * theory: the keys output range contains a copy of every element that is contained in - * [keys_first1, keys_last1) and not contained in [keys_first2, keys_last2). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements - * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n - * elements that are equivalent to them, the last max(m-n,0) elements from - * [keys_first1, keys_last1) range shall be copied to the output range. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_difference_by_key compares key elements using a function object \p comp. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_difference_by_key to compute the - * set difference of two sets of integers sorted in descending order with their values. - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {9, 6, 5, 4, 3, 1, 0}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {9, 7, 5, 3, 1}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[3]; - * int vals_result[3]; - * - * thrust::pair end = thrust::set_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater()); - * // keys_result is now {0, 4, 6} - * // vals_result is now {0, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_intersection_by_key - * \see \p set_symmetric_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_difference_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \p set_intersection_by_key performs a key-value intersection operation from set theory. - * \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set - * theory: the keys output range contains a copy of every element that is contained in both - * [keys_first1, keys_last1) [keys_first2, keys_last2). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if an element appears \c m times in [keys_first1, keys_last1) - * and \c n times in [keys_first2, keys_last2) (where \c m may be zero), then it - * appears min(m,n) times in the keys output range. - * \p set_intersection_by_key is stable, meaning both that elements are copied from the first - * input range rather than the second, and that the relative order of elements in the output range - * is the same as the first input range. - * - * Each time a key element is copied from [keys_first1, keys_last1) to the keys output range, - * the corresponding value element is copied from [values_first1, values_last1) to the values - * output range. - * - * This version of \p set_intersection_by_key compares objects using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no - * \c values_first2 parameter because elements from the second input range are never copied to the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_intersection_by_key to compute the - * set intersection of two sets of integers sorted in ascending order with their values using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {1, 3, 5, 7, 9, 11}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0}; - * - * int B_keys[7] = {1, 1, 2, 3, 5, 8, 13}; - * - * int keys_result[7]; - * int vals_result[7]; - * - * thrust::pair end = thrust::set_intersection_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result); - * - * // keys_result is now {1, 3, 5} - * // vals_result is now {0, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_difference_by_key - * \see \p set_symmetric_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_intersection_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p set_intersection_by_key performs a key-value intersection operation from set theory. - * \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set - * theory: the keys output range contains a copy of every element that is contained in both - * [keys_first1, keys_last1) [keys_first2, keys_last2). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if an element appears \c m times in [keys_first1, keys_last1) - * and \c n times in [keys_first2, keys_last2) (where \c m may be zero), then it - * appears min(m,n) times in the keys output range. - * \p set_intersection_by_key is stable, meaning both that elements are copied from the first - * input range rather than the second, and that the relative order of elements in the output range - * is the same as the first input range. - * - * Each time a key element is copied from [keys_first1, keys_last1) to the keys output range, - * the corresponding value element is copied from [values_first1, values_last1) to the values - * output range. - * - * This version of \p set_intersection_by_key compares objects using \c operator<. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no - * \c values_first2 parameter because elements from the second input range are never copied to the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_intersection_by_key to compute the - * set intersection of two sets of integers sorted in ascending order with their values. - * - * \code - * #include - * ... - * int A_keys[6] = {1, 3, 5, 7, 9, 11}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0}; - * - * int B_keys[7] = {1, 1, 2, 3, 5, 8, 13}; - * - * int keys_result[7]; - * int vals_result[7]; - * - * thrust::pair end = thrust::set_intersection_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result); - * - * // keys_result is now {1, 3, 5} - * // vals_result is now {0, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_difference_by_key - * \see \p set_symmetric_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_intersection_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p set_intersection_by_key performs a key-value intersection operation from set theory. - * \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set - * theory: the keys output range contains a copy of every element that is contained in both - * [keys_first1, keys_last1) [keys_first2, keys_last2). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if an element appears \c m times in [keys_first1, keys_last1) - * and \c n times in [keys_first2, keys_last2) (where \c m may be zero), then it - * appears min(m,n) times in the keys output range. - * \p set_intersection_by_key is stable, meaning both that elements are copied from the first - * input range rather than the second, and that the relative order of elements in the output range - * is the same as the first input range. - * - * Each time a key element is copied from [keys_first1, keys_last1) to the keys output range, - * the corresponding value element is copied from [values_first1, values_last1) to the values - * output range. - * - * This version of \p set_intersection_by_key compares objects using a function object \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no - * \c values_first2 parameter because elements from the second input range are never copied to the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_intersection_by_key to compute the - * set intersection of two sets of integers sorted in descending order with their values using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A_keys[6] = {11, 9, 7, 5, 3, 1}; - * int A_vals[6] = { 0, 0, 0, 0, 0, 0}; - * - * int B_keys[7] = {13, 8, 5, 3, 2, 1, 1}; - * - * int keys_result[7]; - * int vals_result[7]; - * - * thrust::pair end = thrust::set_intersection_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result, thrust::greater()); - * - * // keys_result is now {5, 3, 1} - * // vals_result is now {0, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_difference_by_key - * \see \p set_symmetric_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_intersection_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \p set_intersection_by_key performs a key-value intersection operation from set theory. - * \p set_intersection_by_key constructs a sorted range that is the intersection of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_intersection_by_key performs the "intersection" operation from set - * theory: the keys output range contains a copy of every element that is contained in both - * [keys_first1, keys_last1) [keys_first2, keys_last2). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if an element appears \c m times in [keys_first1, keys_last1) - * and \c n times in [keys_first2, keys_last2) (where \c m may be zero), then it - * appears min(m,n) times in the keys output range. - * \p set_intersection_by_key is stable, meaning both that elements are copied from the first - * input range rather than the second, and that the relative order of elements in the output range - * is the same as the first input range. - * - * Each time a key element is copied from [keys_first1, keys_last1) to the keys output range, - * the corresponding value element is copied from [values_first1, values_last1) to the values - * output range. - * - * This version of \p set_intersection_by_key compares objects using a function object \p comp. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \note Unlike the other key-value set operations, \p set_intersection_by_key is unique in that it has no - * \c values_first2 parameter because elements from the second input range are never copied to the output range. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_intersection_by_key to compute the - * set intersection of two sets of integers sorted in descending order with their values. - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {11, 9, 7, 5, 3, 1}; - * int A_vals[6] = { 0, 0, 0, 0, 0, 0}; - * - * int B_keys[7] = {13, 8, 5, 3, 2, 1, 1}; - * - * int keys_result[7]; - * int vals_result[7]; - * - * thrust::pair end = thrust::set_intersection_by_key(A_keys, A_keys + 6, B_keys, B_keys + 7, A_vals, keys_result, vals_result, thrust::greater()); - * - * // keys_result is now {5, 3, 1} - * // vals_result is now {0, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_difference_by_key - * \see \p set_symmetric_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_intersection_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory. - * \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation: - * it constructs the union of the two sets A - B and B - A, where A and B are the two - * input ranges. That is, the output range contains a copy of every element that is - * contained in [keys_first1, keys_last1) but not [keys_first2, keys_last1), and a copy of - * every element that is contained in [keys_first2, keys_last2) but not [keys_first1, keys_last1). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements that are - * equivalent to each other and [keys_first2, keys_last1) contains \c n elements that are - * equivalent to them, then |m - n| of those elements shall be copied to the output - * range: the last m - n elements from [keys_first1, keys_last1) if m > n, and - * the last n - m of these elements from [keys_first2, keys_last2) if m < n. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_symmetric_difference_by_key compares key elements using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the - * symmetric difference of two sets of integers sorted in ascending order with their values using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {0, 1, 2, 2, 4, 6, 7}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {1, 1, 2, 5, 8}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[6]; - * int vals_result[6]; - * - * thrust::pair end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); - * // keys_result is now {0, 4, 5, 6, 7, 8} - * // vals_result is now {0, 0, 1, 0, 0, 1} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_intersection_by_key - * \see \p set_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory. - * \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation: - * it constructs the union of the two sets A - B and B - A, where A and B are the two - * input ranges. That is, the output range contains a copy of every element that is - * contained in [keys_first1, keys_last1) but not [keys_first2, keys_last1), and a copy of - * every element that is contained in [keys_first2, keys_last2) but not [keys_first1, keys_last1). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements that are - * equivalent to each other and [keys_first2, keys_last1) contains \c n elements that are - * equivalent to them, then |m - n| of those elements shall be copied to the output - * range: the last m - n elements from [keys_first1, keys_last1) if m > n, and - * the last n - m of these elements from [keys_first2, keys_last2) if m < n. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_symmetric_difference_by_key compares key elements using \c operator<. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the - * symmetric difference of two sets of integers sorted in ascending order with their values. - * - * \code - * #include - * ... - * int A_keys[6] = {0, 1, 2, 2, 4, 6, 7}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {1, 1, 2, 5, 8}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[6]; - * int vals_result[6]; - * - * thrust::pair end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); - * // keys_result is now {0, 4, 5, 6, 7, 8} - * // vals_result is now {0, 0, 1, 0, 0, 1} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_intersection_by_key - * \see \p set_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_symmetric_difference_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory. - * \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation: - * it constructs the union of the two sets A - B and B - A, where A and B are the two - * input ranges. That is, the output range contains a copy of every element that is - * contained in [keys_first1, keys_last1) but not [keys_first2, keys_last1), and a copy of - * every element that is contained in [keys_first2, keys_last2) but not [keys_first1, keys_last1). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements that are - * equivalent to each other and [keys_first2, keys_last1) contains \c n elements that are - * equivalent to them, then |m - n| of those elements shall be copied to the output - * range: the last m - n elements from [keys_first1, keys_last1) if m > n, and - * the last n - m of these elements from [keys_first2, keys_last2) if m < n. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_symmetric_difference_by_key compares key elements using a function object \c comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the - * symmetric difference of two sets of integers sorted in descending order with their values using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A_keys[6] = {7, 6, 4, 2, 2, 1, 0}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {8, 5, 2, 1, 1}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[6]; - * int vals_result[6]; - * - * thrust::pair end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); - * // keys_result is now {8, 7, 6, 5, 4, 0} - * // vals_result is now {1, 0, 0, 1, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_intersection_by_key - * \see \p set_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_symmetric_difference_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \p set_symmetric_difference_by_key performs a key-value symmetric difference operation from set theory. - * \p set_difference_by_key constructs a sorted range that is the symmetric difference of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_symmetric_difference_by_key performs a set theoretic calculation: - * it constructs the union of the two sets A - B and B - A, where A and B are the two - * input ranges. That is, the output range contains a copy of every element that is - * contained in [keys_first1, keys_last1) but not [keys_first2, keys_last1), and a copy of - * every element that is contained in [keys_first2, keys_last2) but not [keys_first1, keys_last1). - * The general case is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements that are - * equivalent to each other and [keys_first2, keys_last1) contains \c n elements that are - * equivalent to them, then |m - n| of those elements shall be copied to the output - * range: the last m - n elements from [keys_first1, keys_last1) if m > n, and - * the last n - m of these elements from [keys_first2, keys_last2) if m < n. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_symmetric_difference_by_key compares key elements using a function object \c comp. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the - * symmetric difference of two sets of integers sorted in descending order with their values. - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {7, 6, 4, 2, 2, 1, 0}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {8, 5, 2, 1, 1}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[6]; - * int vals_result[6]; - * - * thrust::pair end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); - * // keys_result is now {8, 7, 6, 5, 4, 0} - * // vals_result is now {1, 0, 0, 1, 0, 0} - * \endcode - * - * \see \p set_union_by_key - * \see \p set_intersection_by_key - * \see \p set_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_symmetric_difference_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \p set_union_by_key performs a key-value union operation from set theory. - * \p set_union_by_key constructs a sorted range that is the union of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_union_by_key performs the "union" operation from set theory: - * the output range contains a copy of every element that is contained in - * [keys_first1, keys_last1), [keys_first2, keys_last1), or both. The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements - * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n - * elements that are equivalent to them, then all \c m elements from the first - * range shall be copied to the output range, in order, and then max(n - m, 0) - * elements from the second range shall be copied to the output, in order. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_union_by_key compares key elements using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the - * symmetric difference of two sets of integers sorted in ascending order with their values using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {0, 2, 4, 6, 8, 10, 12}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {1, 3, 5, 7, 9}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[11]; - * int vals_result[11]; - * - * thrust::pair end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); - * // keys_result is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12} - * // vals_result is now {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0} - * \endcode - * - * \see \p set_symmetric_difference_by_key - * \see \p set_intersection_by_key - * \see \p set_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_union_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p set_union_by_key performs a key-value union operation from set theory. - * \p set_union_by_key constructs a sorted range that is the union of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_union_by_key performs the "union" operation from set theory: - * the output range contains a copy of every element that is contained in - * [keys_first1, keys_last1), [keys_first2, keys_last1), or both. The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements - * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n - * elements that are equivalent to them, then all \c m elements from the first - * range shall be copied to the output range, in order, and then max(n - m, 0) - * elements from the second range shall be copied to the output, in order. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_union_by_key compares key elements using \c operator<. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to operator<. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the - * symmetric difference of two sets of integers sorted in ascending order with their values. - * - * \code - * #include - * ... - * int A_keys[6] = {0, 2, 4, 6, 8, 10, 12}; - * int A_vals[6] = {0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {1, 3, 5, 7, 9}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[11]; - * int vals_result[11]; - * - * thrust::pair end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result); - * // keys_result is now {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12} - * // vals_result is now {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0} - * \endcode - * - * \see \p set_symmetric_difference_by_key - * \see \p set_intersection_by_key - * \see \p set_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_union_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p set_union_by_key performs a key-value union operation from set theory. - * \p set_union_by_key constructs a sorted range that is the union of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_union_by_key performs the "union" operation from set theory: - * the output range contains a copy of every element that is contained in - * [keys_first1, keys_last1), [keys_first2, keys_last1), or both. The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements - * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n - * elements that are equivalent to them, then all \c m elements from the first - * range shall be copied to the output range, in order, and then max(n - m, 0) - * elements from the second range shall be copied to the output, in order. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_union_by_key compares key elements using a function object \c comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the - * symmetric difference of two sets of integers sorted in descending order with their values using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * int A_keys[6] = {12, 10, 8, 6, 4, 2, 0}; - * int A_vals[6] = { 0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {9, 7, 5, 3, 1}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[11]; - * int vals_result[11]; - * - * thrust::pair end = thrust::set_symmetric_difference_by_key(thrust::host, A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater()); - * // keys_result is now {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0} - * // vals_result is now { 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0} - * \endcode - * - * \see \p set_symmetric_difference_by_key - * \see \p set_intersection_by_key - * \see \p set_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_union_by_key(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \p set_union_by_key performs a key-value union operation from set theory. - * \p set_union_by_key constructs a sorted range that is the union of the sorted - * ranges [keys_first1, keys_last1) and [keys_first2, keys_last2). Associated - * with each element from the input and output key ranges is a value element. The associated input - * value ranges need not be sorted. - * - * In the simplest case, \p set_union_by_key performs the "union" operation from set theory: - * the output range contains a copy of every element that is contained in - * [keys_first1, keys_last1), [keys_first2, keys_last1), or both. The general case - * is more complicated, because the input ranges may contain duplicate elements. - * The generalization is that if [keys_first1, keys_last1) contains \c m elements - * that are equivalent to each other and if [keys_first2, keys_last2) contains \c n - * elements that are equivalent to them, then all \c m elements from the first - * range shall be copied to the output range, in order, and then max(n - m, 0) - * elements from the second range shall be copied to the output, in order. - * - * Each time a key element is copied from [keys_first1, keys_last1) or - * [keys_first2, keys_last2) is copied to the keys output range, the - * corresponding value element is copied from the corresponding values input range (beginning at - * \p values_first1 or \p values_first2) to the values output range. - * - * This version of \p set_union_by_key compares key elements using a function object \c comp. - * - * \param keys_first1 The beginning of the first input range of keys. - * \param keys_last1 The end of the first input range of keys. - * \param keys_first2 The beginning of the second input range of keys. - * \param keys_last2 The end of the second input range of keys. - * \param values_first1 The beginning of the first input range of values. - * \param values_first2 The beginning of the first input range of values. - * \param keys_result The beginning of the output range of keys. - * \param values_result The beginning of the output range of values. - * \param comp Comparison operator. - * \return A \p pair \c p such that p.first is the end of the output range of keys, - * and such that p.second is the end of the output range of values. - * - * \tparam InputIterator1 is a model of Input Iterator, - * \p InputIterator1 and \p InputIterator2 have the same \c value_type, - * \p InputIterator1's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator1's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator1's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator2 is a model of Input Iterator, - * \p InputIterator2 and \p InputIterator1 have the same \c value_type, - * \p InputIterator2's \c value_type is a model of LessThan Comparable, - * the ordering on \p InputIterator2's \c value_type is a strict weak ordering, as defined in the LessThan Comparable requirements, - * and \p InputIterator2's \c value_type is convertable to a type in \p OutputIterator's set of \c value_types. - * \tparam InputIterator3 is a model of Input Iterator, - * and \p InputIterator3's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam InputIterator4 is a model of Input Iterator, - * and \p InputIterator4's \c value_type is convertible to a type in \p OutputIterator2's set of \c value_types. - * \tparam OutputIterator1 is a model of Output Iterator. - * \tparam OutputIterator2 is a model of Output Iterator. - * \tparam StrictWeakCompare is a model of Strict Weak Ordering. - * - * \pre The ranges [keys_first1, keys_last1) and [keys_first2, keys_last2) shall be sorted with respect to \p comp. - * \pre The resulting ranges shall not overlap with any input range. - * - * The following code snippet demonstrates how to use \p set_symmetric_difference_by_key to compute the - * symmetric difference of two sets of integers sorted in descending order with their values. - * - * \code - * #include - * #include - * ... - * int A_keys[6] = {12, 10, 8, 6, 4, 2, 0}; - * int A_vals[6] = { 0, 0, 0, 0, 0, 0, 0}; - * - * int B_keys[5] = {9, 7, 5, 3, 1}; - * int B_vals[5] = {1, 1, 1, 1, 1}; - * - * int keys_result[11]; - * int vals_result[11]; - * - * thrust::pair end = thrust::set_symmetric_difference_by_key(A_keys, A_keys + 6, B_keys, B_keys + 5, A_vals, B_vals, keys_result, vals_result, thrust::greater()); - * // keys_result is now {12, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0} - * // vals_result is now { 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0} - * \endcode - * - * \see \p set_symmetric_difference_by_key - * \see \p set_intersection_by_key - * \see \p set_difference_by_key - * \see \p sort_by_key - * \see \p is_sorted - */ -template - thrust::pair - set_union_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakCompare comp); - - -/*! \} // end set_operations - */ - - -} // end thrust - -#include - diff --git a/compat/thrust/sort.h b/compat/thrust/sort.h deleted file mode 100644 index e8edfcd876..0000000000 --- a/compat/thrust/sort.h +++ /dev/null @@ -1,1349 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file sort.h - * \brief Functions for reorganizing ranges into sorted order - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup sorting - * \ingroup algorithms - * \{ - */ - - -/*! \p sort sorts the elements in [first, last) into - * ascending order, meaning that if \c i and \c j are any two valid - * iterators in [first, last) such that \c i precedes \c j, - * then \c *j is not less than \c *i. Note: \c sort is not guaranteed - * to be stable. That is, suppose that \c *i and \c *j are equivalent: - * neither one is less than the other. It is not guaranteed that the - * relative order of these two elements will be preserved by \p sort. - * - * This version of \p sort compares objects using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam RandomAccessIterator is a model of Random Access Iterator, - * \p RandomAccessIterator is mutable, - * and \p RandomAccessIterator's \c value_type is a model of LessThan Comparable, - * and the ordering relation on \p RandomAccessIterator's \c value_type is a strict weak ordering, as defined in the - * LessThan Comparable requirements. - * - * The following code snippet demonstrates how to use \p sort to sort - * a sequence of integers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * thrust::sort(thrust::host, A, A + N); - * // A is now {1, 2, 4, 5, 7, 8} - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p stable_sort - * \see \p sort_by_key - */ -template - void sort(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator first, - RandomAccessIterator last); - - -/*! \p sort sorts the elements in [first, last) into - * ascending order, meaning that if \c i and \c j are any two valid - * iterators in [first, last) such that \c i precedes \c j, - * then \c *j is not less than \c *i. Note: \c sort is not guaranteed - * to be stable. That is, suppose that \c *i and \c *j are equivalent: - * neither one is less than the other. It is not guaranteed that the - * relative order of these two elements will be preserved by \p sort. - * - * This version of \p sort compares objects using \c operator<. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * - * \tparam RandomAccessIterator is a model of Random Access Iterator, - * \p RandomAccessIterator is mutable, - * and \p RandomAccessIterator's \c value_type is a model of LessThan Comparable, - * and the ordering relation on \p RandomAccessIterator's \c value_type is a strict weak ordering, as defined in the - * LessThan Comparable requirements. - * - * The following code snippet demonstrates how to use \p sort to sort - * a sequence of integers. - * - * \code - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * thrust::sort(A, A + N); - * // A is now {1, 2, 4, 5, 7, 8} - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p stable_sort - * \see \p sort_by_key - */ -template - void sort(RandomAccessIterator first, - RandomAccessIterator last); - - -/*! \p sort sorts the elements in [first, last) into - * ascending order, meaning that if \c i and \c j are any two valid - * iterators in [first, last) such that \c i precedes \c j, - * then \c *j is not less than \c *i. Note: \c sort is not guaranteed - * to be stable. That is, suppose that \c *i and \c *j are equivalent: - * neither one is less than the other. It is not guaranteed that the - * relative order of these two elements will be preserved by \p sort. - * - * This version of \p sort compares objects using a function object - * \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp Comparison operator. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam RandomAccessIterator is a model of Random Access Iterator, - * \p RandomAccessIterator is mutable, - * and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's - * \c first_argument_type and \c second_argument_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code demonstrates how to sort integers in descending order - * using the greater comparison operator using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * thrust::sort(thrust::host, A, A + N, thrust::greater()); - * // A is now {8, 7, 5, 4, 2, 1}; - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p stable_sort - * \see \p sort_by_key - */ -template - void sort(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - - -/*! \p sort sorts the elements in [first, last) into - * ascending order, meaning that if \c i and \c j are any two valid - * iterators in [first, last) such that \c i precedes \c j, - * then \c *j is not less than \c *i. Note: \c sort is not guaranteed - * to be stable. That is, suppose that \c *i and \c *j are equivalent: - * neither one is less than the other. It is not guaranteed that the - * relative order of these two elements will be preserved by \p sort. - * - * This version of \p sort compares objects using a function object - * \p comp. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp Comparison operator. - * - * \tparam RandomAccessIterator is a model of Random Access Iterator, - * \p RandomAccessIterator is mutable, - * and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's - * \c first_argument_type and \c second_argument_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code demonstrates how to sort integers in descending order - * using the greater comparison operator. - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * thrust::sort(A, A + N, thrust::greater()); - * // A is now {8, 7, 5, 4, 2, 1}; - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p stable_sort - * \see \p sort_by_key - */ -template - void sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - - -/*! \p stable_sort is much like \c sort: it sorts the elements in - * [first, last) into ascending order, meaning that if \c i - * and \c j are any two valid iterators in [first, last) such - * that \c i precedes \c j, then \c *j is not less than \c *i. - * - * As the name suggests, \p stable_sort is stable: it preserves the - * relative ordering of equivalent elements. That is, if \c x and \c y - * are elements in [first, last) such that \c x precedes \c y, - * and if the two elements are equivalent (neither x < y nor - * y < x) then a postcondition of \p stable_sort is that \c x - * still precedes \c y. - * - * This version of \p stable_sort compares objects using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam RandomAccessIterator is a model of Random Access Iterator, - * \p RandomAccessIterator is mutable, - * and \p RandomAccessIterator's \c value_type is a model of LessThan Comparable, - * and the ordering relation on \p RandomAccessIterator's \c value_type is a strict weak ordering, as defined in the - * LessThan Comparable requirements. - * - * The following code snippet demonstrates how to use \p sort to sort - * a sequence of integers using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * thrust::stable_sort(thrust::host, A, A + N); - * // A is now {1, 2, 4, 5, 7, 8} - * \endcode - * - * \see http://www.sgi.com/tech/stl/stable_sort.html - * \see \p sort - * \see \p stable_sort_by_key - */ -template - void stable_sort(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator first, - RandomAccessIterator last); - - -/*! \p stable_sort is much like \c sort: it sorts the elements in - * [first, last) into ascending order, meaning that if \c i - * and \c j are any two valid iterators in [first, last) such - * that \c i precedes \c j, then \c *j is not less than \c *i. - * - * As the name suggests, \p stable_sort is stable: it preserves the - * relative ordering of equivalent elements. That is, if \c x and \c y - * are elements in [first, last) such that \c x precedes \c y, - * and if the two elements are equivalent (neither x < y nor - * y < x) then a postcondition of \p stable_sort is that \c x - * still precedes \c y. - * - * This version of \p stable_sort compares objects using \c operator<. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * - * \tparam RandomAccessIterator is a model of Random Access Iterator, - * \p RandomAccessIterator is mutable, - * and \p RandomAccessIterator's \c value_type is a model of LessThan Comparable, - * and the ordering relation on \p RandomAccessIterator's \c value_type is a strict weak ordering, as defined in the - * LessThan Comparable requirements. - * - * The following code snippet demonstrates how to use \p sort to sort - * a sequence of integers. - * - * \code - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * thrust::stable_sort(A, A + N); - * // A is now {1, 2, 4, 5, 7, 8} - * \endcode - * - * \see http://www.sgi.com/tech/stl/stable_sort.html - * \see \p sort - * \see \p stable_sort_by_key - */ -template - void stable_sort(RandomAccessIterator first, - RandomAccessIterator last); - - -/*! \p stable_sort is much like \c sort: it sorts the elements in - * [first, last) into ascending order, meaning that if \c i - * and \c j are any two valid iterators in [first, last) such - * that \c i precedes \c j, then \c *j is not less than \c *i. - * - * As the name suggests, \p stable_sort is stable: it preserves the - * relative ordering of equivalent elements. That is, if \c x and \c y - * are elements in [first, last) such that \c x precedes \c y, - * and if the two elements are equivalent (neither x < y nor - * y < x) then a postcondition of \p stable_sort is that \c x - * still precedes \c y. - * - * This version of \p stable_sort compares objects using a function object - * \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp Comparison operator. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam RandomAccessIterator is a model of Random Access Iterator, - * \p RandomAccessIterator is mutable, - * and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's - * \c first_argument_type and \c second_argument_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code demonstrates how to sort integers in descending order - * using the greater comparison operator using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * thrust::sort(A, A + N, thrust::greater()); - * // A is now {8, 7, 5, 4, 2, 1}; - * \endcode - * - * \see http://www.sgi.com/tech/stl/stable_sort.html - * \see \p sort - * \see \p stable_sort_by_key - */ -template - void stable_sort(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - - -/*! \p stable_sort is much like \c sort: it sorts the elements in - * [first, last) into ascending order, meaning that if \c i - * and \c j are any two valid iterators in [first, last) such - * that \c i precedes \c j, then \c *j is not less than \c *i. - * - * As the name suggests, \p stable_sort is stable: it preserves the - * relative ordering of equivalent elements. That is, if \c x and \c y - * are elements in [first, last) such that \c x precedes \c y, - * and if the two elements are equivalent (neither x < y nor - * y < x) then a postcondition of \p stable_sort is that \c x - * still precedes \c y. - * - * This version of \p stable_sort compares objects using a function object - * \p comp. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp Comparison operator. - * - * \tparam RandomAccessIterator is a model of Random Access Iterator, - * \p RandomAccessIterator is mutable, - * and \p RandomAccessIterator's \c value_type is convertible to \p StrictWeakOrdering's - * \c first_argument_type and \c second_argument_type. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * The following code demonstrates how to sort integers in descending order - * using the greater comparison operator. - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int A[N] = {1, 4, 2, 8, 5, 7}; - * thrust::sort(A, A + N, thrust::greater()); - * // A is now {8, 7, 5, 4, 2, 1}; - * \endcode - * - * \see http://www.sgi.com/tech/stl/stable_sort.html - * \see \p sort - * \see \p stable_sort_by_key - */ -template - void stable_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - - -/////////////// -// Key Value // -/////////////// - - -/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the - * elements in [keys_first, keys_last) and [values_first, - * values_first + (keys_last - keys_first)) into ascending key order, - * meaning that if \c i and \c j are any two valid iterators in [keys_first, - * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators - * in [values_first, values_first + (keys_last - keys_first)) - * corresponding to \c i and \c j respectively, then \c *j is not less than - * \c *i. - * - * Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that - * \c *i and \c *j are equivalent: neither one is less than the other. It is not - * guaranteed that the relative order of these two keys or the relative - * order of their corresponding values will be preserved by \p sort_by_key. - * - * This version of \p sort_by_key compares key objects using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the key sequence. - * \param keys_last The end of the key sequence. - * \param values_first The beginning of the value sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam RandomAccessIterator1 is a model of Random Access Iterator, - * \p RandomAccessIterator1 is mutable, - * and \p RandomAccessIterator1's \c value_type is a model of LessThan Comparable, - * and the ordering relation on \p RandomAccessIterator1's \c value_type is a strict weak ordering, as defined in the - * LessThan Comparable requirements. - * \tparam RandomAccessIterator2 is a model of Random Access Iterator, - * and \p RandomAccessIterator2 is mutable. - * - * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). - * - * The following code snippet demonstrates how to use \p sort_by_key to sort - * an array of character values using integers as sorting keys using the \p thrust::host execution policy - * for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int keys[N] = { 1, 4, 2, 8, 5, 7}; - * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; - * thrust::sort_by_key(thrust::host, keys, keys + N, values); - * // keys is now { 1, 2, 4, 5, 7, 8} - * // values is now {'a', 'c', 'b', 'e', 'f', 'd'} - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p stable_sort_by_key - * \see \p sort - */ -template - void sort_by_key(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - - -/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the - * elements in [keys_first, keys_last) and [values_first, - * values_first + (keys_last - keys_first)) into ascending key order, - * meaning that if \c i and \c j are any two valid iterators in [keys_first, - * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators - * in [values_first, values_first + (keys_last - keys_first)) - * corresponding to \c i and \c j respectively, then \c *j is not less than - * \c *i. - * - * Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that - * \c *i and \c *j are equivalent: neither one is less than the other. It is not - * guaranteed that the relative order of these two keys or the relative - * order of their corresponding values will be preserved by \p sort_by_key. - * - * This version of \p sort_by_key compares key objects using \c operator<. - * - * \param keys_first The beginning of the key sequence. - * \param keys_last The end of the key sequence. - * \param values_first The beginning of the value sequence. - * - * \tparam RandomAccessIterator1 is a model of Random Access Iterator, - * \p RandomAccessIterator1 is mutable, - * and \p RandomAccessIterator1's \c value_type is a model of LessThan Comparable, - * and the ordering relation on \p RandomAccessIterator1's \c value_type is a strict weak ordering, as defined in the - * LessThan Comparable requirements. - * \tparam RandomAccessIterator2 is a model of Random Access Iterator, - * and \p RandomAccessIterator2 is mutable. - * - * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). - * - * The following code snippet demonstrates how to use \p sort_by_key to sort - * an array of character values using integers as sorting keys. - * - * \code - * #include - * ... - * const int N = 6; - * int keys[N] = { 1, 4, 2, 8, 5, 7}; - * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; - * thrust::sort_by_key(keys, keys + N, values); - * // keys is now { 1, 2, 4, 5, 7, 8} - * // values is now {'a', 'c', 'b', 'e', 'f', 'd'} - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p stable_sort_by_key - * \see \p sort - */ -template - void sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - - -/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the - * elements in [keys_first, keys_last) and [values_first, - * values_first + (keys_last - keys_first)) into ascending key order, - * meaning that if \c i and \c j are any two valid iterators in [keys_first, - * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators - * in [values_first, values_first + (keys_last - keys_first)) - * corresponding to \c i and \c j respectively, then \c *j is not less than - * \c *i. - * - * Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that - * \c *i and \c *j are equivalent: neither one is less than the other. It is not - * guaranteed that the relative order of these two keys or the relative - * order of their corresponding values will be preserved by \p sort_by_key. - * - * This version of \p sort_by_key compares key objects using a function object - * \c comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the key sequence. - * \param keys_last The end of the key sequence. - * \param values_first The beginning of the value sequence. - * \param comp Comparison operator. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam RandomAccessIterator1 is a model of Random Access Iterator, - * \p RandomAccessIterator1 is mutable, - * and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's - * \c first_argument_type and \c second_argument_type. - * \tparam RandomAccessIterator2 is a model of Random Access Iterator, - * and \p RandomAccessIterator2 is mutable. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). - * - * The following code snippet demonstrates how to use \p sort_by_key to sort - * an array of character values using integers as sorting keys using the \p thrust::host execution policy - * for parallelization.The keys are sorted in descending order using the greater comparison operator. - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int keys[N] = { 1, 4, 2, 8, 5, 7}; - * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; - * thrust::sort_by_key(thrust::host, keys, keys + N, values, thrust::greater()); - * // keys is now { 8, 7, 5, 4, 2, 1} - * // values is now {'d', 'f', 'e', 'b', 'c', 'a'} - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p stable_sort_by_key - * \see \p sort - */ -template - void sort_by_key(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - - -/*! \p sort_by_key performs a key-value sort. That is, \p sort_by_key sorts the - * elements in [keys_first, keys_last) and [values_first, - * values_first + (keys_last - keys_first)) into ascending key order, - * meaning that if \c i and \c j are any two valid iterators in [keys_first, - * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators - * in [values_first, values_first + (keys_last - keys_first)) - * corresponding to \c i and \c j respectively, then \c *j is not less than - * \c *i. - * - * Note: \c sort_by_key is not guaranteed to be stable. That is, suppose that - * \c *i and \c *j are equivalent: neither one is less than the other. It is not - * guaranteed that the relative order of these two keys or the relative - * order of their corresponding values will be preserved by \p sort_by_key. - * - * This version of \p sort_by_key compares key objects using a function object - * \c comp. - * - * \param keys_first The beginning of the key sequence. - * \param keys_last The end of the key sequence. - * \param values_first The beginning of the value sequence. - * \param comp Comparison operator. - * - * \tparam RandomAccessIterator1 is a model of Random Access Iterator, - * \p RandomAccessIterator1 is mutable, - * and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's - * \c first_argument_type and \c second_argument_type. - * \tparam RandomAccessIterator2 is a model of Random Access Iterator, - * and \p RandomAccessIterator2 is mutable. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). - * - * The following code snippet demonstrates how to use \p sort_by_key to sort - * an array of character values using integers as sorting keys. The keys - * are sorted in descending order using the greater comparison operator. - * - * \code - * #include - * ... - * const int N = 6; - * int keys[N] = { 1, 4, 2, 8, 5, 7}; - * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; - * thrust::sort_by_key(keys, keys + N, values, thrust::greater()); - * // keys is now { 8, 7, 5, 4, 2, 1} - * // values is now {'d', 'f', 'e', 'b', 'c', 'a'} - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p stable_sort_by_key - * \see \p sort - */ -template - void sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - - -/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key - * sorts the elements in [keys_first, keys_last) and [values_first, - * values_first + (keys_last - keys_first)) into ascending key order, - * meaning that if \c i and \c j are any two valid iterators in [keys_first, - * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators - * in [values_first, values_first + (keys_last - keys_first)) - * corresponding to \c i and \c j respectively, then \c *j is not less than - * \c *i. - * - * As the name suggests, \p stable_sort_by_key is stable: it preserves the - * relative ordering of equivalent elements. That is, if \c x and \c y - * are elements in [keys_first, keys_last) such that \c x precedes \c y, - * and if the two elements are equivalent (neither x < y nor - * y < x) then a postcondition of \p stable_sort_by_key is that \c x - * still precedes \c y. - * - * This version of \p stable_sort_by_key compares key objects using \c operator<. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the key sequence. - * \param keys_last The end of the key sequence. - * \param values_first The beginning of the value sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam RandomAccessIterator1 is a model of Random Access Iterator, - * \p RandomAccessIterator1 is mutable, - * and \p RandomAccessIterator1's \c value_type is a model of LessThan Comparable, - * and the ordering relation on \p RandomAccessIterator1's \c value_type is a strict weak ordering, as defined in the - * LessThan Comparable requirements. - * \tparam RandomAccessIterator2 is a model of Random Access Iterator, - * and \p RandomAccessIterator2 is mutable. - * - * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). - * - * The following code snippet demonstrates how to use \p stable_sort_by_key to sort - * an array of characters using integers as sorting keys using the \p thrust::host execution policy for - * parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int keys[N] = { 1, 4, 2, 8, 5, 7}; - * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; - * thrust::stable_sort_by_key(thrust::host, keys, keys + N, values); - * // keys is now { 1, 2, 4, 5, 7, 8} - * // values is now {'a', 'c', 'b', 'e', 'f', 'd'} - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p sort_by_key - * \see \p stable_sort - */ -template - void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - - -/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key - * sorts the elements in [keys_first, keys_last) and [values_first, - * values_first + (keys_last - keys_first)) into ascending key order, - * meaning that if \c i and \c j are any two valid iterators in [keys_first, - * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators - * in [values_first, values_first + (keys_last - keys_first)) - * corresponding to \c i and \c j respectively, then \c *j is not less than - * \c *i. - * - * As the name suggests, \p stable_sort_by_key is stable: it preserves the - * relative ordering of equivalent elements. That is, if \c x and \c y - * are elements in [keys_first, keys_last) such that \c x precedes \c y, - * and if the two elements are equivalent (neither x < y nor - * y < x) then a postcondition of \p stable_sort_by_key is that \c x - * still precedes \c y. - * - * This version of \p stable_sort_by_key compares key objects using \c operator<. - * - * \param keys_first The beginning of the key sequence. - * \param keys_last The end of the key sequence. - * \param values_first The beginning of the value sequence. - * - * \tparam RandomAccessIterator1 is a model of Random Access Iterator, - * \p RandomAccessIterator1 is mutable, - * and \p RandomAccessIterator1's \c value_type is a model of LessThan Comparable, - * and the ordering relation on \p RandomAccessIterator1's \c value_type is a strict weak ordering, as defined in the - * LessThan Comparable requirements. - * \tparam RandomAccessIterator2 is a model of Random Access Iterator, - * and \p RandomAccessIterator2 is mutable. - * - * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). - * - * The following code snippet demonstrates how to use \p stable_sort_by_key to sort - * an array of characters using integers as sorting keys. - * - * \code - * #include - * ... - * const int N = 6; - * int keys[N] = { 1, 4, 2, 8, 5, 7}; - * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; - * thrust::stable_sort_by_key(keys, keys + N, values); - * // keys is now { 1, 2, 4, 5, 7, 8} - * // values is now {'a', 'c', 'b', 'e', 'f', 'd'} - * \endcode - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p sort_by_key - * \see \p stable_sort - */ -template - void stable_sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - - -/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key - * sorts the elements in [keys_first, keys_last) and [values_first, - * values_first + (keys_last - keys_first)) into ascending key order, - * meaning that if \c i and \c j are any two valid iterators in [keys_first, - * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators - * in [values_first, values_first + (keys_last - keys_first)) - * corresponding to \c i and \c j respectively, then \c *j is not less than - * \c *i. - * - * As the name suggests, \p stable_sort_by_key is stable: it preserves the - * relative ordering of equivalent elements. That is, if \c x and \c y - * are elements in [keys_first, keys_last) such that \c x precedes \c y, - * and if the two elements are equivalent (neither x < y nor - * y < x) then a postcondition of \p stable_sort_by_key is that \c x - * still precedes \c y. - * - * This version of \p stable_sort_by_key compares key objects using the function - * object \p comp. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the key sequence. - * \param keys_last The end of the key sequence. - * \param values_first The beginning of the value sequence. - * \param comp Comparison operator. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam RandomAccessIterator1 is a model of Random Access Iterator, - * \p RandomAccessIterator1 is mutable, - * and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's - * \c first_argument_type and \c second_argument_type. - * \tparam RandomAccessIterator2 is a model of Random Access Iterator, - * and \p RandomAccessIterator2 is mutable. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). - * - * The following code snippet demonstrates how to use \p sort_by_key to sort - * an array of character values using integers as sorting keys using the \p thrust::host execution policy for - * parallelization. The keys are sorted in descending order using the greater comparison operator. - * - * \code - * #include - * #include - * ... - * const int N = 6; - * int keys[N] = { 1, 4, 2, 8, 5, 7}; - * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; - * thrust::stable_sort_by_key(thrust::host, keys, keys + N, values, thrust::greater()); - * // keys is now { 8, 7, 5, 4, 2, 1} - * // values is now {'d', 'f', 'e', 'b', 'c', 'a'} - * \endcode - * - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p sort_by_key - * \see \p stable_sort - */ -template - void stable_sort_by_key(const thrust::detail::execution_policy_base &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - - -/*! \p stable_sort_by_key performs a key-value sort. That is, \p stable_sort_by_key - * sorts the elements in [keys_first, keys_last) and [values_first, - * values_first + (keys_last - keys_first)) into ascending key order, - * meaning that if \c i and \c j are any two valid iterators in [keys_first, - * keys_last) such that \c i precedes \c j, and \c p and \c q are iterators - * in [values_first, values_first + (keys_last - keys_first)) - * corresponding to \c i and \c j respectively, then \c *j is not less than - * \c *i. - * - * As the name suggests, \p stable_sort_by_key is stable: it preserves the - * relative ordering of equivalent elements. That is, if \c x and \c y - * are elements in [keys_first, keys_last) such that \c x precedes \c y, - * and if the two elements are equivalent (neither x < y nor - * y < x) then a postcondition of \p stable_sort_by_key is that \c x - * still precedes \c y. - * - * This version of \p stable_sort_by_key compares key objects using the function - * object \p comp. - * - * \param keys_first The beginning of the key sequence. - * \param keys_last The end of the key sequence. - * \param values_first The beginning of the value sequence. - * \param comp Comparison operator. - * - * \tparam RandomAccessIterator1 is a model of Random Access Iterator, - * \p RandomAccessIterator1 is mutable, - * and \p RandomAccessIterator1's \c value_type is convertible to \p StrictWeakOrdering's - * \c first_argument_type and \c second_argument_type. - * \tparam RandomAccessIterator2 is a model of Random Access Iterator, - * and \p RandomAccessIterator2 is mutable. - * \tparam StrictWeakOrdering is a model of Strict Weak Ordering. - * - * \pre The range [keys_first, keys_last)) shall not overlap the range [values_first, values_first + (keys_last - keys_first)). - * - * The following code snippet demonstrates how to use \p sort_by_key to sort - * an array of character values using integers as sorting keys. The keys - * are sorted in descending order using the greater comparison operator. - * - * \code - * #include - * ... - * const int N = 6; - * int keys[N] = { 1, 4, 2, 8, 5, 7}; - * char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; - * thrust::stable_sort_by_key(keys, keys + N, values, thrust::greater()); - * // keys is now { 8, 7, 5, 4, 2, 1} - * // values is now {'d', 'f', 'e', 'b', 'c', 'a'} - * \endcode - * - * - * \see http://www.sgi.com/tech/stl/sort.html - * \see \p sort_by_key - * \see \p stable_sort - */ -template - void stable_sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - - -/*! \} // end sorting - */ - - -/*! \addtogroup reductions - * \{ - * \addtogroup predicates - * \{ - */ - - -/*! \p is_sorted returns \c true if the range [first, last) is - * sorted in ascending order, and \c false otherwise. - * - * Specifically, this version of \p is_sorted returns \c false if for - * some iterator \c i in the range [first, last - 1) the - * expression *(i + 1) < *i is \c true. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return \c true, if the sequence is sorted; \c false, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator's \c value_type is a model of LessThan Comparable, - * and the ordering on objects of \p ForwardIterator's \c value_type is a strict weak ordering, as defined - * in the LessThan Comparable requirements. - * - * - * The following code demonstrates how to use \p is_sorted to test whether the - * contents of a \c device_vector are stored in ascending order using the \p thrust::device execution policy - * for parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector v(6); - * v[0] = 1; - * v[1] = 4; - * v[2] = 2; - * v[3] = 8; - * v[4] = 5; - * v[5] = 7; - * - * bool result = thrust::is_sorted(thrust::device, v.begin(), v.end()); - * - * // result == false - * - * thrust::sort(v.begin(), v.end()); - * result = thrust::is_sorted(thrust::device, v.begin(), v.end()); - * - * // result == true - * \endcode - * - * \see http://www.sgi.com/tech/stl/is_sorted.html - * \see is_sorted_until - * \see \c sort - * \see \c stable_sort - * \see \c less - */ -template - bool is_sorted(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last); - - -/*! \p is_sorted returns \c true if the range [first, last) is - * sorted in ascending order, and \c false otherwise. - * - * Specifically, this version of \p is_sorted returns \c false if for - * some iterator \c i in the range [first, last - 1) the - * expression *(i + 1) < *i is \c true. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \return \c true, if the sequence is sorted; \c false, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator's \c value_type is a model of LessThan Comparable, - * and the ordering on objects of \p ForwardIterator's \c value_type is a strict weak ordering, as defined - * in the LessThan Comparable requirements. - * - * - * The following code demonstrates how to use \p is_sorted to test whether the - * contents of a \c device_vector are stored in ascending order. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector v(6); - * v[0] = 1; - * v[1] = 4; - * v[2] = 2; - * v[3] = 8; - * v[4] = 5; - * v[5] = 7; - * - * bool result = thrust::is_sorted(v.begin(), v.end()); - * - * // result == false - * - * thrust::sort(v.begin(), v.end()); - * result = thrust::is_sorted(v.begin(), v.end()); - * - * // result == true - * \endcode - * - * \see http://www.sgi.com/tech/stl/is_sorted.html - * \see is_sorted_until - * \see \c sort - * \see \c stable_sort - * \see \c less - */ -template - bool is_sorted(ForwardIterator first, - ForwardIterator last); - - -/*! \p is_sorted returns \c true if the range [first, last) is sorted in ascending - * order accoring to a user-defined comparison operation, and \c false otherwise. - * - * Specifically, this version of \p is_sorted returns \c false if for some iterator \c i in - * the range [first, last - 1) the expression comp(*(i + 1), *i) is \c true. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp Comparison operator. - * \return \c true, if the sequence is sorted according to comp; \c false, otherwise. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to both \c StrictWeakOrdering's \c first_argument_type - * and \c second_argument_type. - * \tparam Compare is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p is_sorted to test whether the - * contents of a \c device_vector are stored in descending order using the \p thrust::device execution - * policy for parallelization: - * - * \code - * #include - * #include - * #include - * #include - * ... - * thrust::device_vector v(6); - * v[0] = 1; - * v[1] = 4; - * v[2] = 2; - * v[3] = 8; - * v[4] = 5; - * v[5] = 7; - * - * thrust::greater comp; - * bool result = thrust::is_sorted(thrust::device, v.begin(), v.end(), comp); - * - * // result == false - * - * thrust::sort(v.begin(), v.end(), comp); - * result = thrust::is_sorted(thrust::device, v.begin(), v.end(), comp); - * - * // result == true - * \endcode - * - * \see http://www.sgi.com/tech/stl/is_sorted.html - * \see \c sort - * \see \c stable_sort - * \see \c less - */ -template - bool is_sorted(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Compare comp); - - -/*! \p is_sorted returns \c true if the range [first, last) is sorted in ascending - * order accoring to a user-defined comparison operation, and \c false otherwise. - * - * Specifically, this version of \p is_sorted returns \c false if for some iterator \c i in - * the range [first, last - 1) the expression comp(*(i + 1), *i) is \c true. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param comp Comparison operator. - * \return \c true, if the sequence is sorted according to comp; \c false, otherwise. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator's \c value_type is convertible to both \c StrictWeakOrdering's \c first_argument_type - * and \c second_argument_type. - * \tparam Compare is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p is_sorted to test whether the - * contents of a \c device_vector are stored in descending order. - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector v(6); - * v[0] = 1; - * v[1] = 4; - * v[2] = 2; - * v[3] = 8; - * v[4] = 5; - * v[5] = 7; - * - * thrust::greater comp; - * bool result = thrust::is_sorted(v.begin(), v.end(), comp); - * - * // result == false - * - * thrust::sort(v.begin(), v.end(), comp); - * result = thrust::is_sorted(v.begin(), v.end(), comp); - * - * // result == true - * \endcode - * - * \see http://www.sgi.com/tech/stl/is_sorted.html - * \see \c sort - * \see \c stable_sort - * \see \c less - */ -template - bool is_sorted(ForwardIterator first, - ForwardIterator last, - Compare comp); - - -/*! This version of \p is_sorted_until returns the last iterator \c i in [first,last] for - * which the range [first,last) is sorted using \c operator<. If distance(first,last) < 2, - * \p is_sorted_until simply returns \p last. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \return The last iterator in the input range for which it is sorted. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator and - * \p ForwardIterator's \c value_type is a model of LessThan Comparable. - * - * The following code snippet demonstrates how to use \p is_sorted_until to find the first position - * in an array where the data becomes unsorted using the \p thrust::host execution policy for - * parallelization: - * - * \code - * #include - * #include - * - * ... - * - * int A[8] = {0, 1, 2, 3, 0, 1, 2, 3}; - * - * int * B = thrust::is_sorted_until(thrust::host, A, A + 8); - * - * // B - A is 4 - * // [A, B) is sorted - * \endcode - * - * \see \p is_sorted - * \see \p sort - * \see \p sort_by_key - * \see \p stable_sort - * \see \p stable_sort_by_key - */ -template - ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last); - - -/*! This version of \p is_sorted_until returns the last iterator \c i in [first,last] for - * which the range [first,last) is sorted using \c operator<. If distance(first,last) < 2, - * \p is_sorted_until simply returns \p last. - * - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \return The last iterator in the input range for which it is sorted. - * - * \tparam ForwardIterator is a model of Forward Iterator and - * \p ForwardIterator's \c value_type is a model of LessThan Comparable. - * - * The following code snippet demonstrates how to use \p is_sorted_until to find the first position - * in an array where the data becomes unsorted: - * - * \code - * #include - * - * ... - * - * int A[8] = {0, 1, 2, 3, 0, 1, 2, 3}; - * - * int * B = thrust::is_sorted_until(A, A + 8); - * - * // B - A is 4 - * // [A, B) is sorted - * \endcode - * - * \see \p is_sorted - * \see \p sort - * \see \p sort_by_key - * \see \p stable_sort - * \see \p stable_sort_by_key - */ -template - ForwardIterator is_sorted_until(ForwardIterator first, - ForwardIterator last); - - -/*! This version of \p is_sorted_until returns the last iterator \c i in [first,last] for - * which the range [first,last) is sorted using the function object \c comp. If distance(first,last) < 2, - * \p is_sorted_until simply returns \p last. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization: - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param comp The function object to use for comparison. - * \return The last iterator in the input range for which it is sorted. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator and - * \p ForwardIterator's \c value_type is convertible to \p Compare's \c argument_type. - * \tparam Compare is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p is_sorted_until to find the first position - * in an array where the data becomes unsorted in descending order using the \p thrust::host execution - * policy for parallelization: - * - * \code - * #include - * #include - * #include - * - * ... - * - * int A[8] = {3, 2, 1, 0, 3, 2, 1, 0}; - * - * thrust::greater comp; - * int * B = thrust::is_sorted_until(thrust::host, A, A + 8, comp); - * - * // B - A is 4 - * // [A, B) is sorted in descending order - * \endcode - * - * \see \p is_sorted - * \see \p sort - * \see \p sort_by_key - * \see \p stable_sort - * \see \p stable_sort_by_key - */ -template - ForwardIterator is_sorted_until(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - Compare comp); - - -/*! This version of \p is_sorted_until returns the last iterator \c i in [first,last] for - * which the range [first,last) is sorted using the function object \c comp. If distance(first,last) < 2, - * \p is_sorted_until simply returns \p last. - * - * \param first The beginning of the range of interest. - * \param last The end of the range of interest. - * \param comp The function object to use for comparison. - * \return The last iterator in the input range for which it is sorted. - * - * \tparam ForwardIterator is a model of Forward Iterator and - * \p ForwardIterator's \c value_type is convertible to \p Compare's \c argument_type. - * \tparam Compare is a model of Strict Weak Ordering. - * - * The following code snippet demonstrates how to use \p is_sorted_until to find the first position - * in an array where the data becomes unsorted in descending order: - * - * \code - * #include - * #include - * - * ... - * - * int A[8] = {3, 2, 1, 0, 3, 2, 1, 0}; - * - * thrust::greater comp; - * int * B = thrust::is_sorted_until(A, A + 8, comp); - * - * // B - A is 4 - * // [A, B) is sorted in descending order - * \endcode - * - * \see \p is_sorted - * \see \p sort - * \see \p sort_by_key - * \see \p stable_sort - * \see \p stable_sort_by_key - */ -template - ForwardIterator is_sorted_until(ForwardIterator first, - ForwardIterator last, - Compare comp); - - -/*! \} // end predicates - * \} // end reductions - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/swap.h b/compat/thrust/swap.h deleted file mode 100644 index 085e546930..0000000000 --- a/compat/thrust/swap.h +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file swap.h - * \brief Functions for swapping the value of elements - */ - -#pragma once - -#include -#include - -// empty Doxygen comment below so namespace thrust's documentation will be extracted - -/*! - */ -namespace thrust -{ - -/*! \addtogroup utility - * \{ - */ - -/*! \addtogroup swap - * \{ - */ - -/*! \p swap assigns the contents of \c a to \c b and the - * contents of \c b to \c a. This is used as a primitive operation - * by many other algorithms. - * - * \param a The first value of interest. After completion, - * the value of b will be returned here. - * \param b The second value of interest. After completion, - * the value of a will be returned here. - * - * \tparam Assignable is a model of Assignable. - * - * The following code snippet demonstrates how to use \p swap to - * swap the contents of two variables. - * - * \code - * #include - * ... - * int x = 1; - * int y = 2; - * thrust::swap(x,h); - * - * // x == 2, y == 1 - * \endcode - */ -template -__host__ __device__ -inline void swap(Assignable1 &a, Assignable2 &b); - -/*! \} // swap - */ - -/*! \} // utility - */ - - -/*! \addtogroup copying - * \{ - */ - - -/*! \p swap_ranges swaps each of the elements in the range [first1, last1) - * with the corresponding element in the range [first2, first2 + (last1 - first1)). - * That is, for each integer \c n such that 0 <= n < (last1 - first1), it swaps - * *(first1 + n) and *(first2 + n). The return value is - * first2 + (last1 - first1). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first sequence to swap. - * \param last1 One position past the last element of the first sequence to swap. - * \param first2 The beginning of the second sequence to swap. - * \return An iterator pointing to one position past the last element of the second - * sequence to swap. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator1 is a model of Forward Iterator, - * and \p ForwardIterator1's \c value_type must be convertible to \p ForwardIterator2's \c value_type. - * \tparam ForwardIterator2 is a model of Forward Iterator, - * and \p ForwardIterator2's \c value_type must be convertible to \p ForwardIterator1's \c value_type. - * - * \pre \p first1 may equal \p first2, but the range [first1, last1) shall not overlap the range [first2, first2 + (last1 - first1)) otherwise. - * - * The following code snippet demonstrates how to use \p swap_ranges to - * swap the contents of two \c thrust::device_vectors using the \p thrust::device execution - * policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * thrust::device_vector v1(2), v2(2); - * v1[0] = 1; - * v1[1] = 2; - * v2[0] = 3; - * v2[1] = 4; - * - * thrust::swap_ranges(thrust::device, v1.begin(), v1.end(), v2.begin()); - * - * // v1[0] == 3, v1[1] == 4, v2[0] == 1, v2[1] == 2 - * \endcode - * - * \see http://www.sgi.com/tech/stl/swap_ranges.html - * \see \c swap - */ -template - ForwardIterator2 swap_ranges(const thrust::detail::execution_policy_base &exec, - ForwardIterator1 first1, - ForwardIterator1 last1, - ForwardIterator2 first2); - - -/*! \p swap_ranges swaps each of the elements in the range [first1, last1) - * with the corresponding element in the range [first2, first2 + (last1 - first1)). - * That is, for each integer \c n such that 0 <= n < (last1 - first1), it swaps - * *(first1 + n) and *(first2 + n). The return value is - * first2 + (last1 - first1). - * - * \param first1 The beginning of the first sequence to swap. - * \param last1 One position past the last element of the first sequence to swap. - * \param first2 The beginning of the second sequence to swap. - * \return An iterator pointing to one position past the last element of the second - * sequence to swap. - * - * \tparam ForwardIterator1 is a model of Forward Iterator, - * and \p ForwardIterator1's \c value_type must be convertible to \p ForwardIterator2's \c value_type. - * \tparam ForwardIterator2 is a model of Forward Iterator, - * and \p ForwardIterator2's \c value_type must be convertible to \p ForwardIterator1's \c value_type. - * - * \pre \p first1 may equal \p first2, but the range [first1, last1) shall not overlap the range [first2, first2 + (last1 - first1)) otherwise. - * - * The following code snippet demonstrates how to use \p swap_ranges to - * swap the contents of two \c thrust::device_vectors. - * - * \code - * #include - * #include - * ... - * thrust::device_vector v1(2), v2(2); - * v1[0] = 1; - * v1[1] = 2; - * v2[0] = 3; - * v2[1] = 4; - * - * thrust::swap_ranges(v1.begin(), v1.end(), v2.begin()); - * - * // v1[0] == 3, v1[1] == 4, v2[0] == 1, v2[1] == 2 - * \endcode - * - * \see http://www.sgi.com/tech/stl/swap_ranges.html - * \see \c swap - */ -template - ForwardIterator2 swap_ranges(ForwardIterator1 first1, - ForwardIterator1 last1, - ForwardIterator2 first2); - - -/*! \} // copying - */ - - -} // end thrust - -#include - diff --git a/compat/thrust/system/cpp/detail/adjacent_difference.h b/compat/thrust/system/cpp/detail/adjacent_difference.h deleted file mode 100644 index ea212ffcd9..0000000000 --- a/compat/thrust/system/cpp/detail/adjacent_difference.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file adjacent_difference.h - * \brief C++ implementation of adjacent_difference. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template -OutputIterator adjacent_difference(execution_policy &, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - return thrust::system::detail::internal::scalar::adjacent_difference(first, last, result, binary_op); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/assign_value.h b/compat/thrust/system/cpp/detail/assign_value.h deleted file mode 100644 index 847fc97afd..0000000000 --- a/compat/thrust/system/cpp/detail/assign_value.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template -__host__ __device__ - void assign_value(thrust::system::cpp::detail::execution_policy &, Pointer1 dst, Pointer2 src) -{ - *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src); -} // end assign_value() - -} // end detail -} // end cpp -} // end system -} // end thrust - diff --git a/compat/thrust/system/cpp/detail/binary_search.h b/compat/thrust/system/cpp/detail/binary_search.h deleted file mode 100644 index 37af539e0d..0000000000 --- a/compat/thrust/system/cpp/detail/binary_search.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file binary_search.h - * \brief C++ implementation of binary search algorithms. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template -ForwardIterator lower_bound(tag, - ForwardIterator first, - ForwardIterator last, - const T& val, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::lower_bound(first, last, val, comp); -} - - -template -ForwardIterator upper_bound(tag, - ForwardIterator first, - ForwardIterator last, - const T& val, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::upper_bound(first, last, val, comp); -} - -template -bool binary_search(tag, - ForwardIterator first, - ForwardIterator last, - const T& val, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::binary_search(first, last, val, comp); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/copy.h b/compat/thrust/system/cpp/detail/copy.h deleted file mode 100644 index 7299bbbd7a..0000000000 --- a/compat/thrust/system/cpp/detail/copy.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file copy.h - * \brief C++ implementations of copy functions. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - OutputIterator copy(tag, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - return thrust::system::detail::internal::scalar::copy(first, last, result); -} - -template - OutputIterator copy_n(tag, - InputIterator first, - Size n, - OutputIterator result) -{ - return thrust::system::detail::internal::scalar::copy_n(first, n, result); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/copy_if.h b/compat/thrust/system/cpp/detail/copy_if.h deleted file mode 100644 index 2faadfa1b7..0000000000 --- a/compat/thrust/system/cpp/detail/copy_if.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - - -template - OutputIterator copy_if(tag, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::copy_if(first, last, stencil, result, pred); -} - -} // end detail -} // end cpp -} // end system -} // end thrust - diff --git a/compat/thrust/system/cpp/detail/count.h b/compat/thrust/system/cpp/detail/count.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/count.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/dispatch/sort.h b/compat/thrust/system/cpp/detail/dispatch/sort.h deleted file mode 100644 index 2a03cf62bc..0000000000 --- a/compat/thrust/system/cpp/detail/dispatch/sort.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ -namespace dispatch -{ - -//////////////// -// Radix Sort // -//////////////// - -template -void stable_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp, - thrust::detail::true_type) -{ - thrust::system::detail::internal::scalar::stable_radix_sort(first, last); - - // if comp is greater then reverse the keys - typedef typename thrust::iterator_traits::value_type KeyType; - const static bool reverse = thrust::detail::is_same >::value; - - if (reverse) - thrust::reverse(first, last); -} - -template -void stable_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp, - thrust::detail::true_type) -{ - // if comp is greater then reverse the keys and values - typedef typename thrust::iterator_traits::value_type KeyType; - const static bool reverse = thrust::detail::is_same >::value; - - // note, we also have to reverse the (unordered) input to preserve stability - if (reverse) - { - thrust::reverse(first1, last1); - thrust::reverse(first2, first2 + (last1 - first1)); - } - - thrust::system::detail::internal::scalar::stable_radix_sort_by_key(first1, last1, first2); - - if (reverse) - { - thrust::reverse(first1, last1); - thrust::reverse(first2, first2 + (last1 - first1)); - } -} - -//////////////// -// Merge Sort // -//////////////// - -template -void stable_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp, - thrust::detail::false_type) -{ - thrust::system::detail::internal::scalar::stable_merge_sort(first, last, comp); -} - -template -void stable_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp, - thrust::detail::false_type) -{ - thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, last1, first2, comp); -} - -} // end namespace dispatch -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/equal.h b/compat/thrust/system/cpp/detail/equal.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/equal.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/execution_policy.h b/compat/thrust/system/cpp/detail/execution_policy.h deleted file mode 100644 index 229ff5c6c8..0000000000 --- a/compat/thrust/system/cpp/detail/execution_policy.h +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -// put the canonical tag in the same ns as the backend's entry points -namespace cpp -{ -namespace detail -{ - -// this awkward sequence of definitions arise -// from the desire both for tag to derive -// from execution_policy and for execution_policy -// to convert to tag (when execution_policy is not -// an ancestor of tag) - -// forward declaration of tag -struct tag; - -// forward declaration of execution_policy -template struct execution_policy; - -// specialize execution_policy for tag -template<> - struct execution_policy - : thrust::execution_policy -{}; - -// tag's definition comes before the -// generic definition of execution_policy -struct tag : execution_policy {}; - -// allow conversion to tag when it is not a successor -template - struct execution_policy - : thrust::execution_policy -{ - // allow conversion to tag - inline operator tag () const - { - return tag(); - } -}; - -} // end detail - -// alias execution_policy and tag here -using thrust::system::cpp::detail::execution_policy; -using thrust::system::cpp::detail::tag; - -} // end cpp -} // end system - -// alias items at top-level -namespace cpp -{ - -using thrust::system::cpp::execution_policy; -using thrust::system::cpp::tag; - -} // end cpp -} // end thrust - diff --git a/compat/thrust/system/cpp/detail/extrema.h b/compat/thrust/system/cpp/detail/extrema.h deleted file mode 100644 index 3eab6d406d..0000000000 --- a/compat/thrust/system/cpp/detail/extrema.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file extrema.h - * \brief C++ implementations of extrema functions. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template -ForwardIterator min_element(execution_policy &, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - return thrust::system::detail::internal::scalar::min_element(first, last, comp); -} - - -template -ForwardIterator max_element(execution_policy &, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - return thrust::system::detail::internal::scalar::max_element(first, last, comp); -} - - -template -thrust::pair minmax_element(execution_policy &, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - return thrust::system::detail::internal::scalar::minmax_element(first, last, comp); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/fill.h b/compat/thrust/system/cpp/detail/fill.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/fill.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/find.h b/compat/thrust/system/cpp/detail/find.h deleted file mode 100644 index 9698524ed8..0000000000 --- a/compat/thrust/system/cpp/detail/find.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file find.h - * \brief C++ implementation of find_if. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template -InputIterator find_if(tag, - InputIterator first, - InputIterator last, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::find_if(first, last, pred); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/for_each.h b/compat/thrust/system/cpp/detail/for_each.h deleted file mode 100644 index 8d4e1c730b..0000000000 --- a/compat/thrust/system/cpp/detail/for_each.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - - -template -InputIterator for_each(thrust::system::cpp::detail::execution_policy &, - InputIterator first, - InputIterator last, - UnaryFunction f) -{ - return thrust::system::detail::internal::scalar::for_each(first, last, f); -} - -template -InputIterator for_each_n(thrust::system::cpp::detail::execution_policy &, - InputIterator first, - Size n, - UnaryFunction f) -{ - return thrust::system::detail::internal::scalar::for_each_n(first, n, f); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/gather.h b/compat/thrust/system/cpp/detail/gather.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/gather.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/generate.h b/compat/thrust/system/cpp/detail/generate.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/generate.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/get_value.h b/compat/thrust/system/cpp/detail/get_value.h deleted file mode 100644 index 5ddb2c8349..0000000000 --- a/compat/thrust/system/cpp/detail/get_value.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - - -template -__host__ __device__ - typename thrust::iterator_value::type - get_value(thrust::system::cpp::detail::execution_policy &, Pointer ptr) -{ - return *thrust::raw_pointer_cast(ptr); -} // end get_value() - - -} // end detail -} // end cpp -} // end system -} // end thrust - diff --git a/compat/thrust/system/cpp/detail/inner_product.h b/compat/thrust/system/cpp/detail/inner_product.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/inner_product.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/iter_swap.h b/compat/thrust/system/cpp/detail/iter_swap.h deleted file mode 100644 index 257276ffea..0000000000 --- a/compat/thrust/system/cpp/detail/iter_swap.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - - -template -__host__ __device__ - void iter_swap(tag, Pointer1 a, Pointer2 b) -{ - using thrust::swap; - swap(*thrust::raw_pointer_cast(a), *thrust::raw_pointer_cast(b)); -} // end iter_swap() - - -} // end detail -} // end cpp -} // end system -} // end thrust - diff --git a/compat/thrust/system/cpp/detail/logical.h b/compat/thrust/system/cpp/detail/logical.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/logical.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/malloc_and_free.h b/compat/thrust/system/cpp/detail/malloc_and_free.h deleted file mode 100644 index 4f8ae82092..0000000000 --- a/compat/thrust/system/cpp/detail/malloc_and_free.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include // for malloc & free -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - - -// note that malloc returns a raw pointer to avoid -// depending on the heavyweight thrust/system/cpp/memory.h header -template - void *malloc(execution_policy &, std::size_t n) -{ - return std::malloc(n); -} // end malloc() - - -template - void free(execution_policy &, Pointer ptr) -{ - std::free(thrust::raw_pointer_cast(ptr)); -} // end free() - - -} // end detail -} // end cpp -} // end system -} // end thrust - diff --git a/compat/thrust/system/cpp/detail/memory.inl b/compat/thrust/system/cpp/detail/memory.inl deleted file mode 100644 index 7f9a48dee7..0000000000 --- a/compat/thrust/system/cpp/detail/memory.inl +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace thrust -{ - -// XXX WAR an issue with MSVC 2005 (cl v14.00) incorrectly implementing -// pointer_raw_pointer for pointer by specializing it here -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) -namespace detail -{ - -template - struct pointer_raw_pointer< thrust::cpp::pointer > -{ - typedef typename thrust::cpp::pointer::raw_pointer type; -}; // end pointer_raw_pointer - -} // end detail -#endif - -namespace system -{ -namespace cpp -{ - - -template - template - reference & - reference - ::operator=(const reference &other) -{ - return super_t::operator=(other); -} // end reference::operator=() - -template - reference & - reference - ::operator=(const value_type &x) -{ - return super_t::operator=(x); -} // end reference::operator=() - -template -__host__ __device__ -void swap(reference a, reference b) -{ - a.swap(b); -} // end swap() - -pointer malloc(std::size_t n) -{ - tag t; - return pointer(thrust::system::cpp::detail::malloc(t, n)); -} // end malloc() - -template -pointer malloc(std::size_t n) -{ - pointer raw_ptr = thrust::system::cpp::malloc(sizeof(T) * n); - return pointer(reinterpret_cast(raw_ptr.get())); -} // end malloc() - -void free(pointer ptr) -{ - tag t; - return thrust::system::cpp::detail::free(t, ptr); -} // end free() - -} // end cpp -} // end system -} // end thrust - diff --git a/compat/thrust/system/cpp/detail/merge.h b/compat/thrust/system/cpp/detail/merge.h deleted file mode 100644 index 7f01c0713a..0000000000 --- a/compat/thrust/system/cpp/detail/merge.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template -OutputIterator merge(execution_policy &, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::merge(first1, last1, first2, last2, result, comp); -} - -template -thrust::pair - merge_by_key(execution_policy &, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::merge_by_key(keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, comp); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/mismatch.h b/compat/thrust/system/cpp/detail/mismatch.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/mismatch.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/par.h b/compat/thrust/system/cpp/detail/par.h deleted file mode 100644 index 953e5274db..0000000000 --- a/compat/thrust/system/cpp/detail/par.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - - -struct par_t : thrust::system::cpp::detail::execution_policy -{ - par_t() : thrust::system::cpp::detail::execution_policy() {} - - template - thrust::detail::execute_with_allocator - operator()(Allocator &alloc) const - { - return thrust::detail::execute_with_allocator(alloc); - } -}; - - -} // end detail - - -static const detail::par_t par; - - -} // end cpp -} // end system - - -// alias par here -namespace cpp -{ - - -using thrust::system::cpp::par; - - -} // end cpp -} // end thrust - diff --git a/compat/thrust/system/cpp/detail/partition.h b/compat/thrust/system/cpp/detail/partition.h deleted file mode 100644 index 25a4f1c346..0000000000 --- a/compat/thrust/system/cpp/detail/partition.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file partition.h - * \brief cpp implementations of partition functions - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - ForwardIterator stable_partition(tag, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::stable_partition(first, last, pred); -} - -template - ForwardIterator stable_partition(tag, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::stable_partition(first, last, stencil, pred); -} - -template - thrust::pair - stable_partition_copy(tag, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::stable_partition_copy(first, last, out_true, out_false, pred); -} - -template - thrust::pair - stable_partition_copy(tag, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::stable_partition_copy(first, last, stencil, out_true, out_false, pred); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/reduce.h b/compat/thrust/system/cpp/detail/reduce.h deleted file mode 100644 index 5428206ba3..0000000000 --- a/compat/thrust/system/cpp/detail/reduce.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief C++ implementation of reduce algorithms. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - OutputType reduce(execution_policy &, - InputIterator begin, - InputIterator end, - OutputType init, - BinaryFunction binary_op) -{ - return thrust::system::detail::internal::scalar::reduce(begin, end, init, binary_op); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/reduce_by_key.h b/compat/thrust/system/cpp/detail/reduce_by_key.h deleted file mode 100644 index 22dc2d9d3d..0000000000 --- a/compat/thrust/system/cpp/detail/reduce_by_key.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - thrust::pair - reduce_by_key(execution_policy &, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - return thrust::system::detail::internal::scalar::reduce_by_key(keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/remove.h b/compat/thrust/system/cpp/detail/remove.h deleted file mode 100644 index cf2202bee3..0000000000 --- a/compat/thrust/system/cpp/detail/remove.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - ForwardIterator remove_if(tag, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::remove_if(first, last, pred); -} - - -template - ForwardIterator remove_if(tag, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::remove_if(first, last, stencil, pred); -} - - -template - OutputIterator remove_copy_if(tag, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::remove_copy_if(first, last, result, pred); -} - - - -template - OutputIterator remove_copy_if(tag, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - return thrust::system::detail::internal::scalar::remove_copy_if(first, last, stencil, result, pred); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/replace.h b/compat/thrust/system/cpp/detail/replace.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/replace.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/reverse.h b/compat/thrust/system/cpp/detail/reverse.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/reverse.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/scan.h b/compat/thrust/system/cpp/detail/scan.h deleted file mode 100644 index d4bae1e739..0000000000 --- a/compat/thrust/system/cpp/detail/scan.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan.h - * \brief C++ implementations of scan functions. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - OutputIterator inclusive_scan(execution_policy &, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - return thrust::system::detail::internal::scalar::inclusive_scan(first, last, result, binary_op); -} - - -template - OutputIterator exclusive_scan(execution_policy &, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - BinaryFunction binary_op) -{ - return thrust::system::detail::internal::scalar::exclusive_scan(first, last, result, init, binary_op); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/scan_by_key.h b/compat/thrust/system/cpp/detail/scan_by_key.h deleted file mode 100644 index 4165d842fd..0000000000 --- a/compat/thrust/system/cpp/detail/scan_by_key.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - OutputIterator inclusive_scan_by_key(tag, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - return thrust::system::detail::internal::scalar::inclusive_scan_by_key(first1, last1, first2, result, binary_pred, binary_op); -} - - -template - OutputIterator exclusive_scan_by_key(tag, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - return thrust::system::detail::internal::scalar::exclusive_scan_by_key(first1, last1, first2, result, init, binary_pred, binary_op); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/scatter.h b/compat/thrust/system/cpp/detail/scatter.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/scatter.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/sequence.h b/compat/thrust/system/cpp/detail/sequence.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/sequence.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/set_operations.h b/compat/thrust/system/cpp/detail/set_operations.h deleted file mode 100644 index 07ce71257b..0000000000 --- a/compat/thrust/system/cpp/detail/set_operations.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - - -template - OutputIterator set_difference(execution_policy &, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::set_difference(first1, last1, first2, last2, result, comp); -} - - -template - OutputIterator set_intersection(execution_policy &, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::set_intersection(first1, last1, first2, last2, result, comp); -} - - -template - OutputIterator set_symmetric_difference(execution_policy &, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::set_symmetric_difference(first1, last1, first2, last2, result, comp); -} - - -template - OutputIterator set_union(execution_policy &, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - return thrust::system::detail::internal::scalar::set_union(first1, last1, first2, last2, result, comp); -} - - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/sort.h b/compat/thrust/system/cpp/detail/sort.h deleted file mode 100644 index 60244e22a3..0000000000 --- a/compat/thrust/system/cpp/detail/sort.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - void stable_sort(execution_policy &, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - thrust::system::detail::internal::scalar::stable_sort(first, last, comp); -} - -template - void stable_sort_by_key(execution_policy &, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - thrust::system::detail::internal::scalar::stable_sort_by_key(keys_first, keys_last, values_first, comp); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/swap_ranges.h b/compat/thrust/system/cpp/detail/swap_ranges.h deleted file mode 100644 index a834a2c0ed..0000000000 --- a/compat/thrust/system/cpp/detail/swap_ranges.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// cpp has no special swap_ranges - diff --git a/compat/thrust/system/cpp/detail/tabulate.h b/compat/thrust/system/cpp/detail/tabulate.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/tabulate.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/temporary_buffer.h b/compat/thrust/system/cpp/detail/temporary_buffer.h deleted file mode 100644 index 628bd75719..0000000000 --- a/compat/thrust/system/cpp/detail/temporary_buffer.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special temporary buffer functions - diff --git a/compat/thrust/system/cpp/detail/transform.h b/compat/thrust/system/cpp/detail/transform.h deleted file mode 100644 index 5909d4a4fb..0000000000 --- a/compat/thrust/system/cpp/detail/transform.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// cpp has no special transform - diff --git a/compat/thrust/system/cpp/detail/transform_reduce.h b/compat/thrust/system/cpp/detail/transform_reduce.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/transform_reduce.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/transform_scan.h b/compat/thrust/system/cpp/detail/transform_scan.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/transform_scan.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/uninitialized_copy.h b/compat/thrust/system/cpp/detail/uninitialized_copy.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/uninitialized_copy.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/uninitialized_fill.h b/compat/thrust/system/cpp/detail/uninitialized_fill.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cpp/detail/uninitialized_fill.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cpp/detail/unique.h b/compat/thrust/system/cpp/detail/unique.h deleted file mode 100644 index cf740498e1..0000000000 --- a/compat/thrust/system/cpp/detail/unique.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - ForwardIterator unique(execution_policy &, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred) -{ - return thrust::system::detail::internal::scalar::unique(first, last, binary_pred); -} - -template - OutputIterator unique_copy(execution_policy &, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred) -{ - return thrust::system::detail::internal::scalar::unique_copy(first, last, output, binary_pred); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/unique_by_key.h b/compat/thrust/system/cpp/detail/unique_by_key.h deleted file mode 100644 index a9f13d6a27..0000000000 --- a/compat/thrust/system/cpp/detail/unique_by_key.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template - thrust::pair - unique_by_key(execution_policy &, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred) -{ - return thrust::system::detail::internal::scalar::unique_by_key(keys_first, keys_last, values_first, binary_pred); -} - - -template - thrust::pair - unique_by_key_copy(execution_policy &, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - return thrust::system::detail::internal::scalar::unique_by_key_copy(keys_first, keys_last, values_first, keys_output, values_output, binary_pred); -} - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cpp/detail/vector.inl b/compat/thrust/system/cpp/detail/vector.inl deleted file mode 100644 index 03bffcd8aa..0000000000 --- a/compat/thrust/system/cpp/detail/vector.inl +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ - -template - vector - ::vector() - : super_t() -{} - -template - vector - ::vector(size_type n) - : super_t(n) -{} - -template - vector - ::vector(size_type n, const value_type &value) - : super_t(n,value) -{} - -template - vector - ::vector(const vector &x) - : super_t(x) -{} - -template - template - vector - ::vector(const thrust::detail::vector_base &x) - : super_t(x) -{} - -template - template - vector - ::vector(const std::vector &x) - : super_t(x) -{} - -template - template - vector - ::vector(InputIterator first, InputIterator last) - : super_t(first,last) -{} - -template - template - vector & - vector - ::operator=(const std::vector &x) -{ - super_t::operator=(x); - return *this; -} - -template - template - vector & - vector - ::operator=(const thrust::detail::vector_base &x) -{ - super_t::operator=(x); - return *this; -} - -} // end cpp -} // end system -} // end thrust - diff --git a/compat/thrust/system/cpp/execution_policy.h b/compat/thrust/system/cpp/execution_policy.h deleted file mode 100644 index f192eb9659..0000000000 --- a/compat/thrust/system/cpp/execution_policy.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -/*! \file thrust/system/cpp/execution_policy.h - * \brief Execution policies for Thrust's standard C++ system. - */ - -#include - -// get the execution policies definitions first -#include - -// get the definition of par -#include - -// now get all the algorithm definitions - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -// define these entities here for the purpose of Doxygenating them -// they are actually defined elsewhere -#if 0 -namespace thrust -{ -namespace system -{ -namespace cpp -{ - - -/*! \addtogroup execution_policies - * \{ - */ - - -/*! \p thrust::system::cpp::execution_policy is the base class for all Thrust parallel execution - * policies which are derived from Thrust's standard C++ backend system. - */ -template -struct execution_policy : thrust::execution_policy -{}; - - -/*! \p thrust::system::cpp::tag is a type representing Thrust's standard C++ backend system in C++'s type system. - * Iterators "tagged" with a type which is convertible to \p cpp::tag assert that they may be - * "dispatched" to algorithm implementations in the \p cpp system. - */ -struct tag : thrust::system::cpp::execution_policy { unspecified }; - - -/*! - * \p thrust::system::cpp::par is the parallel execution policy associated with Thrust's standard - * C++ backend system. - * - * Instead of relying on implicit algorithm dispatch through iterator system tags, users may - * directly target Thrust's C++ backend system by providing \p thrust::cpp::par as an algorithm - * parameter. - * - * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such - * as \p thrust::cpp::vector. - * - * The type of \p thrust::cpp::par is implementation-defined. - * - * The following code snippet demonstrates how to use \p thrust::cpp::par to explicitly dispatch an - * invocation of \p thrust::for_each to the standard C++ backend system: - * - * \code - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * printf("%d\n"); - * } - * }; - * ... - * int vec[3]; - * vec[0] = 0; vec[1] = 1; vec[2] = 2; - * - * thrust::for_each(thrust::cpp::par, vec.begin(), vec.end(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - */ -static const unspecified par; - - -/*! \} - */ - - -} // end cpp -} // end system -} // end thrust -#endif - - diff --git a/compat/thrust/system/cpp/memory.h b/compat/thrust/system/cpp/memory.h deleted file mode 100644 index f3a58b8c32..0000000000 --- a/compat/thrust/system/cpp/memory.h +++ /dev/null @@ -1,414 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/cpp/memory.h - * \brief Managing memory associated with Thrust's standard C++ system. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ - -template class pointer; - -} // end cpp -} // end system -} // end thrust - - -/*! \cond - */ - -// specialize std::iterator_traits to avoid problems with the name of -// pointer's constructor shadowing its nested pointer type -// do this before pointer is defined so the specialization is correctly -// used inside the definition -namespace std -{ - -template - struct iterator_traits > -{ - private: - typedef thrust::system::cpp::pointer ptr; - - public: - typedef typename ptr::iterator_category iterator_category; - typedef typename ptr::value_type value_type; - typedef typename ptr::difference_type difference_type; - typedef ptr pointer; - typedef typename ptr::reference reference; -}; // end iterator_traits - -} // end std - -/*! \endcond - */ - - -namespace thrust -{ -namespace system -{ - -/*! \addtogroup system_backends Systems - * \ingroup system - * \{ - */ - -/*! \namespace thrust::system::cpp - * \brief \p thrust::system::cpp is the namespace containing functionality for allocating, manipulating, - * and deallocating memory available to Thrust's standard C++ backend system. - * The identifiers are provided in a separate namespace underneath thrust::system - * for import convenience but are also aliased in the top-level thrust::cpp - * namespace for easy access. - * - */ -namespace cpp -{ - -// forward declaration of reference for pointer -template class reference; - -/*! \cond - */ - -// XXX nvcc + msvc have trouble instantiating reference below -// this is a workaround -namespace detail -{ - -template - struct reference_msvc_workaround -{ - typedef thrust::system::cpp::reference type; -}; // end reference_msvc_workaround - -} // end detail - -/*! \endcond - */ - - -/*! \p pointer stores a pointer to an object allocated in memory available to the cpp system. - * This type provides type safety when dispatching standard algorithms on ranges resident - * in cpp memory. - * - * \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic. - * - * \p pointer can be created with the function \p cpp::malloc, or by explicitly calling its constructor - * with a raw pointer. - * - * The raw pointer encapsulated by a \p pointer may be obtained by eiter its get member function - * or the \p raw_pointer_cast function. - * - * \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory - * pointed to by \p pointer. - * - * \tparam T specifies the type of the pointee. - * - * \see cpp::malloc - * \see cpp::free - * \see raw_pointer_cast - */ -template - class pointer - : public thrust::pointer< - T, - thrust::system::cpp::tag, - thrust::system::cpp::reference, - thrust::system::cpp::pointer - > -{ - /*! \cond - */ - - private: - typedef thrust::pointer< - T, - thrust::system::cpp::tag, - //thrust::system::cpp::reference, - typename detail::reference_msvc_workaround::type, - thrust::system::cpp::pointer - > super_t; - - /*! \endcond - */ - - public: - // note that cpp::pointer's member functions need __host__ __device__ - // to interoperate with nvcc + iterators' dereference member function - - /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0. - */ - __host__ __device__ - pointer() : super_t() {} - - /*! This constructor allows construction of a pointer from a T*. - * - * \param ptr A raw pointer to copy from, presumed to point to a location in memory - * accessible by the \p cpp system. - * \tparam OtherT \p OtherT shall be convertible to \p T. - */ - template - __host__ __device__ - explicit pointer(OtherT *ptr) : super_t(ptr) {} - - /*! This constructor allows construction from another pointer-like object with related type. - * - * \param other The \p OtherPointer to copy. - * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible - * to \p thrust::system::cpp::tag and its element type shall be convertible to \p T. - */ - template - __host__ __device__ - pointer(const OtherPointer &other, - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer - >::type * = 0) : super_t(other) {} - - /*! Assignment operator allows assigning from another pointer-like object with related type. - * - * \param other The other pointer-like object to assign from. - * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible - * to \p thrust::system::cpp::tag and its element type shall be convertible to \p T. - */ - template - __host__ __device__ - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer, - pointer & - >::type - operator=(const OtherPointer &other) - { - return super_t::operator=(other); - } -}; // end pointer - - -/*! \p reference is a wrapped reference to an object stored in memory available to the \p cpp system. - * \p reference is the type of the result of dereferencing a \p cpp::pointer. - * - * \tparam T Specifies the type of the referenced object. - */ -template - class reference - : public thrust::reference< - T, - thrust::system::cpp::pointer, - thrust::system::cpp::reference - > -{ - /*! \cond - */ - - private: - typedef thrust::reference< - T, - thrust::system::cpp::pointer, - thrust::system::cpp::reference - > super_t; - - /*! \endcond - */ - - public: - /*! \cond - */ - - typedef typename super_t::value_type value_type; - typedef typename super_t::pointer pointer; - - /*! \endcond - */ - - /*! This constructor initializes this \p reference to refer to an object - * pointed to by the given \p pointer. After this \p reference is constructed, - * it shall refer to the object pointed to by \p ptr. - * - * \param ptr A \p pointer to copy from. - */ - __host__ __device__ - explicit reference(const pointer &ptr) - : super_t(ptr) - {} - - /*! This constructor accepts a const reference to another \p reference of related type. - * After this \p reference is constructed, it shall refer to the same object as \p other. - * - * \param other A \p reference to copy from. - * \tparam OtherT The element type of the other \p reference. - * - * \note This constructor is templated primarily to allow initialization of reference - * from reference. - */ - template - __host__ __device__ - reference(const reference &other, - typename thrust::detail::enable_if_convertible< - typename reference::pointer, - pointer - >::type * = 0) - : super_t(other) - {} - - /*! Copy assignment operator copy assigns from another \p reference of related type. - * - * \param other The other \p reference to assign from. - * \return *this - * \tparam OtherT The element type of the other \p reference. - */ - template - reference &operator=(const reference &other); - - /*! Assignment operator assigns from a \p value_type. - * - * \param x The \p value_type to assign from. - * \return *this - */ - reference &operator=(const value_type &x); -}; // end reference - -/*! Exchanges the values of two objects referred to by \p reference. - * \p x The first \p reference of interest. - * \p y The second \p reference ot interest. - */ -template -__host__ __device__ -void swap(reference x, reference y); - -/*! Allocates an area of memory available to Thrust's cpp system. - * \param n Number of bytes to allocate. - * \return A cpp::pointer pointing to the beginning of the newly - * allocated memory. A null cpp::pointer is returned if - * an error occurs. - * \note The cpp::pointer returned by this function must be - * deallocated with \p cpp::free. - * \see cpp::free - * \see std::malloc - */ -inline pointer malloc(std::size_t n); - -/*! Allocates a typed area of memory available to Thrust's cpp system. - * \param n Number of elements to allocate. - * \return A cpp::pointer pointing to the beginning of the newly - * allocated elements. A null cpp::pointer is returned if - * an error occurs. - * \note The cpp::pointer returned by this function must be - * deallocated with \p cpp::free. - * \see cpp::free - * \see std::malloc - */ -template -inline pointer malloc(std::size_t n); - -/*! Deallocates an area of memory previously allocated by cpp::malloc. - * \param ptr A cpp::pointer pointing to the beginning of an area - * of memory previously allocated with cpp::malloc. - * \see cpp::malloc - * \see std::free - */ -inline void free(pointer ptr); - -// XXX upon c++11 -// template using allocator = thrust::detail::malloc_allocator >; - -/*! \p cpp::allocator is the default allocator used by the \p cpp system's containers such as - * cpp::vector if no user-specified allocator is provided. \p cpp::allocator allocates - * (deallocates) storage with \p cpp::malloc (\p cpp::free). - */ -template - struct allocator - : thrust::detail::malloc_allocator< - T, - tag, - pointer - > -{ - /*! The \p rebind metafunction provides the type of an \p allocator - * instantiated with another type. - * - * \tparam U The other type to use for instantiation. - */ - template - struct rebind - { - /*! The typedef \p other gives the type of the rebound \p allocator. - */ - typedef allocator other; - }; - - /*! No-argument constructor has no effect. - */ - __host__ __device__ - inline allocator() {} - - /*! Copy constructor has no effect. - */ - __host__ __device__ - inline allocator(const allocator &) {} - - /*! Constructor from other \p allocator has no effect. - */ - template - __host__ __device__ - inline allocator(const allocator &) {} - - /*! Destructor has no effect. - */ - __host__ __device__ - inline ~allocator() {} -}; // end allocator - -} // end cpp - -/*! \} - */ - -} // end system - -/*! \namespace thrust::cpp - * \brief \p thrust::cpp is a top-level alias for thrust::system::cpp. - */ -namespace cpp -{ - -using thrust::system::cpp::pointer; -using thrust::system::cpp::reference; -using thrust::system::cpp::malloc; -using thrust::system::cpp::free; -using thrust::system::cpp::allocator; - -} // end cpp - -} // end thrust - -#include - diff --git a/compat/thrust/system/cpp/vector.h b/compat/thrust/system/cpp/vector.h deleted file mode 100644 index 4282df991a..0000000000 --- a/compat/thrust/system/cpp/vector.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/cpp/vector.h - * \brief A dynamically-sizable array of elements which reside in memory available to - * Thrust's standard C++ system. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -// forward declaration of host_vector -template class host_vector; - -namespace system -{ -namespace cpp -{ - -// XXX upon c++11 -// template > using vector = thrust::detail::vector_base; - -/*! \p cpp::vector is a container that supports random access to elements, - * constant time removal of elements at the end, and linear time insertion - * and removal of elements at the beginning or in the middle. The number of - * elements in a \p cpp::vector may vary dynamically; memory management is - * automatic. The elements contained in a \p cpp::vector reside in memory - * available to the \p cpp system. - * - * \tparam T The element type of the \p cpp::vector. - * \tparam Allocator The allocator type of the \p cpp::vector. Defaults to \p cpp::allocator. - * - * \see http://www.sgi.com/tech/stl/Vector.html - * \see host_vector For the documentation of the complete interface which is - * shared by \p cpp::vector - * \see device_vector - */ -template > - class vector - : public thrust::detail::vector_base -{ - /*! \cond - */ - private: - typedef thrust::detail::vector_base super_t; - /*! \endcond - */ - - public: - - /*! \cond - */ - typedef typename super_t::size_type size_type; - typedef typename super_t::value_type value_type; - - /*! \endcond - */ - - /*! This constructor creates an empty \p cpp::vector. - */ - vector(); - - /*! This constructor creates a \p cpp::vector with \p n default-constructed elements. - * \param n The size of the \p cpp::vector to create. - */ - explicit vector(size_type n); - - /*! This constructor creates a \p cpp::vector with \p n copies of \p value. - * \param n The size of the \p cpp::vector to create. - * \param value An element to copy. - */ - explicit vector(size_type n, const value_type &value); - - /*! Copy constructor copies from another \p cpp::vector. - * \param x The other \p cpp::vector to copy. - */ - vector(const vector &x); - - /*! This constructor copies from another Thrust vector-like object. - * \param x The other object to copy from. - */ - template - vector(const thrust::detail::vector_base &x); - - /*! This constructor copies from a \c std::vector. - * \param x The \c std::vector to copy from. - */ - template - vector(const std::vector &x); - - /*! This constructor creates a \p cpp::vector by copying from a range. - * \param first The beginning of the range. - * \param last The end of the range. - */ - template - vector(InputIterator first, InputIterator last); - - // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns - - /*! Assignment operator assigns from a \c std::vector. - * \param x The \c std::vector to assign from. - * \return *this - */ - template - vector &operator=(const std::vector &x); - - /*! Assignment operator assigns from another Thrust vector-like object. - * \param x The other object to assign from. - * \return *this - */ - template - vector &operator=(const thrust::detail::vector_base &x); -}; // end vector - -} // end cpp -} // end system - -// alias system::cpp names at top-level -namespace cpp -{ - -using thrust::system::cpp::vector; - -} // end cpp - -} // end thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/adjacent_difference.h b/compat/thrust/system/cuda/detail/adjacent_difference.h deleted file mode 100644 index ec51794ff9..0000000000 --- a/compat/thrust/system/cuda/detail/adjacent_difference.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file adjacent_difference.h - * \brief CUDA implementation of adjacent_difference. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template -OutputIterator adjacent_difference(execution_policy &exec, - InputIterator first, InputIterator last, - OutputIterator result, - BinaryFunction binary_op); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/adjacent_difference.inl b/compat/thrust/system/cuda/detail/adjacent_difference.inl deleted file mode 100644 index 9e4756a5a3..0000000000 --- a/compat/thrust/system/cuda/detail/adjacent_difference.inl +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template -struct last_index_in_each_interval : public thrust::unary_function -{ - typedef typename Decomposition::index_type index_type; - - Decomposition decomp; - - last_index_in_each_interval(Decomposition decomp) : decomp(decomp) {} - - __host__ __device__ - index_type operator()(index_type interval) - { - return decomp[interval].end() - 1; - } -}; - -template -struct adjacent_difference_closure -{ - InputIterator1 input; - InputIterator2 input_copy; - OutputIterator output; - BinaryFunction binary_op; - Decomposition decomp; - Context context; - - typedef Context context_type; - - adjacent_difference_closure(InputIterator1 input, - InputIterator2 input_copy, - OutputIterator output, - BinaryFunction binary_op, - Decomposition decomp, - Context context = Context()) - : input(input), input_copy(input_copy), output(output), binary_op(binary_op), decomp(decomp), context(context) {} - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename thrust::iterator_value::type InputType; - typedef typename Decomposition::index_type index_type; - - // this block processes results in [range.begin(), range.end()) - thrust::system::detail::internal::index_range range = decomp[context.block_index()]; - - input_copy += context.block_index() - 1; - - // prime the temp values for all threads so we don't need to launch a default constructor - InputType next_left = (context.block_index() == 0) ? *input : *input_copy; - - index_type base = range.begin(); - index_type i = range.begin() + context.thread_index(); - - if (i < range.end()) - { - if (context.thread_index() > 0) - { - InputIterator1 temp = input + (i - 1); - next_left = *temp; - } - } - - input += i; - output += i; - - while (base < range.end()) - { - InputType curr_left = next_left; - - if (i + context.block_dimension() < range.end()) - { - InputIterator1 temp = input + (context.block_dimension() - 1); - next_left = *temp; - } - - context.barrier(); - - if (i < range.end()) - { - if (i == 0) - *output = *input; - else - { - InputType x = *input; - *output = binary_op(x, curr_left); - } - } - - i += context.block_dimension(); - base += context.block_dimension(); - input += context.block_dimension(); - output += context.block_dimension(); - } - } -}; - -} // end namespace detail - - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - -template -OutputIterator adjacent_difference(execution_policy &exec, - InputIterator first, InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - typedef typename thrust::iterator_value::type InputType; - typedef typename thrust::iterator_difference::type IndexType; - typedef thrust::system::detail::internal::uniform_decomposition Decomposition; - - IndexType n = last - first; - - if (n == 0) - return result; - - Decomposition decomp = default_decomposition(last - first); - - // allocate temporary storage - thrust::detail::temporary_array temp(exec, decomp.size() - 1); - - // gather last value in each interval - detail::last_index_in_each_interval unary_op(decomp); - thrust::gather(exec, - thrust::make_transform_iterator(thrust::counting_iterator(0), unary_op), - thrust::make_transform_iterator(thrust::counting_iterator(0), unary_op) + (decomp.size() - 1), - first, - temp.begin()); - - - typedef typename thrust::detail::temporary_array::iterator InputIterator2; - typedef detail::blocked_thread_array Context; - typedef detail::adjacent_difference_closure Closure; - - Closure closure(first, temp.begin(), result, binary_op, decomp); - - detail::launch_closure(closure, decomp.size()); - - return result + n; -} - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/assign_value.h b/compat/thrust/system/cuda/detail/assign_value.h deleted file mode 100644 index c90cf65b79..0000000000 --- a/compat/thrust/system/cuda/detail/assign_value.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -// XXX WAR an issue with msvc 2005 (cl v14.00) which creates multiply-defined -// symbols resulting from assign_value -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) - -namespace -{ - -template -inline __host__ __device__ - void assign_value_msvc2005_war(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) -{ - // XXX war nvbugs/881631 - struct war_nvbugs_881631 - { - __host__ inline static void host_path(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) - { - thrust::copy(exec, src, src + 1, dst); - } - - __device__ inline static void device_path(thrust::cuda::execution_policy &, Pointer1 dst, Pointer2 src) - { - *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src); - } - }; - -#ifndef __CUDA_ARCH__ - war_nvbugs_881631::host_path(exec,dst,src); -#else - war_nvbugs_881631::device_path(exec,dst,src); -#endif // __CUDA_ARCH__ -} // end assign_value_msvc2005_war() - -} // end anon namespace - -template -inline __host__ __device__ - void assign_value(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) -{ - return assign_value_msvc2005_war(exec,dst,src); -} // end assign_value() - -#else - -template -inline __host__ __device__ - void assign_value(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) -{ - // XXX war nvbugs/881631 - struct war_nvbugs_881631 - { - __host__ inline static void host_path(thrust::cuda::execution_policy &exec, Pointer1 dst, Pointer2 src) - { - thrust::copy(exec, src, src + 1, dst); - } - - __device__ inline static void device_path(thrust::cuda::execution_policy &, Pointer1 dst, Pointer2 src) - { - *thrust::raw_pointer_cast(dst) = *thrust::raw_pointer_cast(src); - } - }; - -#ifndef __CUDA_ARCH__ - war_nvbugs_881631::host_path(exec,dst,src); -#else - war_nvbugs_881631::device_path(exec,dst,src); -#endif // __CUDA_ARCH__ -} // end assign_value() - -#endif // msvc 2005 WAR - - -// XXX WAR an issue with msvc 2005 (cl v14.00) which creates multiply-defined -// symbols resulting from assign_value -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) - -namespace -{ - - -template -inline __host__ __device__ - void assign_value_msvc2005_war(cross_system &systems, Pointer1 dst, Pointer2 src) -{ - // XXX war nvbugs/881631 - struct war_nvbugs_881631 - { - __host__ inline static void host_path(cross_system &systems, Pointer1 dst, Pointer2 src) - { - // rotate the systems so that they are ordered the same as (src, dst) - // for the call to thrust::copy - cross_system rotated_systems = systems.rotate(); - thrust::copy(rotated_systems, src, src + 1, dst); - } - - __device__ inline static void device_path(cross_system &systems, Pointer1 dst, Pointer2 src) - { - // XXX forward the true cuda::execution_policy inside systems here - // instead of materializing a tag - thrust::cuda::tag cuda_tag; - thrust::system::cuda::detail::assign_value(cuda_tag, dst, src); - } - }; - -#if __CUDA_ARCH__ - war_nvbugs_881631::device_path(systems,dst,src); -#else - war_nvbugs_881631::host_path(systems,dst,src); -#endif -} // end assign_value_msvc2005_war - - -} // end anon namespace - - -template -inline __host__ __device__ - void assign_value(cross_system &systems, Pointer1 dst, Pointer2 src) -{ - return assign_value_msvc2005_war(systems,dst,src); -} // end assign_value() - - -#else - - -template -inline __host__ __device__ - void assign_value(cross_system &systems, Pointer1 dst, Pointer2 src) -{ - // XXX war nvbugs/881631 - struct war_nvbugs_881631 - { - __host__ inline static void host_path(cross_system &systems, Pointer1 dst, Pointer2 src) - { - // rotate the systems so that they are ordered the same as (src, dst) - // for the call to thrust::copy - cross_system rotated_systems = systems.rotate(); - thrust::copy(rotated_systems, src, src + 1, dst); - } - - __device__ inline static void device_path(cross_system &systems, Pointer1 dst, Pointer2 src) - { - // XXX forward the true cuda::execution_policy inside systems here - // instead of materializing a tag - thrust::cuda::tag cuda_tag; - thrust::system::cuda::detail::assign_value(cuda_tag, dst, src); - } - }; - -#if __CUDA_ARCH__ - war_nvbugs_881631::device_path(systems,dst,src); -#else - war_nvbugs_881631::host_path(systems,dst,src); -#endif -} // end assign_value() - - -#endif // msvc 2005 WAR - - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/binary_search.h b/compat/thrust/system/cuda/detail/binary_search.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/binary_search.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/block/copy.h b/compat/thrust/system/cuda/detail/block/copy.h deleted file mode 100644 index 9cc786bfcf..0000000000 --- a/compat/thrust/system/cuda/detail/block/copy.h +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file copy.h - * \brief CUDA implementation of device-to-device copy, - * based on Gregory Diamos' memcpy code. - */ - -#pragma once - -#include - -#include - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace block -{ - -namespace trivial_copy_detail -{ - - -template - inline __device__ thrust::pair quotient_and_remainder(Size n, Size d) -{ - Size quotient = n / d; - Size remainder = n - d * quotient; - return thrust::make_pair(quotient,remainder); -} // end quotient_and_remainder() - - -// assumes the addresses dst & src are aligned to T boundaries -template -__device__ __thrust_forceinline__ -void aligned_copy(Context context, T *dst, const T *src, unsigned int num_elements) -{ - for(unsigned int i = context.thread_index(); - i < num_elements; - i += context.block_dimension()) - { - dst[i] = src[i]; - } -} // end aligned_copy() - - -} // end namespace trivial_copy_detail - - -template -__device__ __thrust_forceinline__ -void trivial_copy(Context context, void* destination_, const void* source_, size_t num_bytes) -{ - // reinterpret at bytes - char* destination = reinterpret_cast(destination_); - const char* source = reinterpret_cast(source_); - - // TODO replace this with uint64 -#if THRUST_DEVICE_COMPILER != THRUST_DEVICE_COMPILER_NVCC - typedef long long int2; - typedef long long uint2; -#endif // THRUST_DEVICE_COMPILER_NVCC - - // check alignment - // XXX can we do this in three steps? - // 1. copy until alignment is met - // 2. go hog wild - // 3. get the remainder - if(reinterpret_cast(destination) % sizeof(uint2) != 0 || reinterpret_cast(source) % sizeof(uint2) != 0) - { - for(unsigned int i = context.thread_index(); i < num_bytes; i += context.block_dimension()) - { - destination[i] = source[i]; - } - } - else - { - // it's aligned; do a wide copy - - // this pair stores the number of int2s in the aligned portion of the arrays - // and the number of bytes in the remainder - const thrust::pair num_wide_elements_and_remainder_bytes = trivial_copy_detail::quotient_and_remainder(num_bytes, sizeof(int2)); - - // copy int2 elements - trivial_copy_detail::aligned_copy(context, - reinterpret_cast(destination), - reinterpret_cast(source), - num_wide_elements_and_remainder_bytes.first); - - // XXX we could copy int elements here - - // copy remainder byte by byte - - // to find the beginning of the remainder arrays, we need to point at the beginning, and then skip the number of bytes in the aligned portion - // this is sizeof(int2) times the number of int2s comprising the aligned portion - const char *remainder_first = reinterpret_cast(source + sizeof(int2) * num_wide_elements_and_remainder_bytes.first); - char *remainder_result = reinterpret_cast(destination + sizeof(int2) * num_wide_elements_and_remainder_bytes.first); - - trivial_copy_detail::aligned_copy(context, remainder_result, remainder_first, num_wide_elements_and_remainder_bytes.second); - } -} // end trivial_copy() - - -namespace detail -{ -namespace dispatch -{ - -template - __thrust_forceinline__ __device__ - RandomAccessIterator2 copy(Context context, - RandomAccessIterator1 first, - RandomAccessIterator1 last, - RandomAccessIterator2 result, - thrust::detail::true_type is_trivial_copy) -{ - typedef typename thrust::iterator_value::type T; - - const T *src = &thrust::raw_reference_cast(*first); - T *dst = &thrust::raw_reference_cast(*result); - - size_t n = (last - first); - thrust::system::cuda::detail::block::trivial_copy(context, dst, src, n * sizeof(T)); - return result + n; -} // end copy() - -template - __thrust_forceinline__ __device__ - RandomAccessIterator2 copy(Context context, - RandomAccessIterator1 first, - RandomAccessIterator1 last, - RandomAccessIterator2 result, - thrust::detail::false_type is_trivial_copy) -{ - RandomAccessIterator2 end_of_output = result + (last - first); - - // advance iterators - first += context.thread_index(); - result += context.thread_index(); - - for(; - first < last; - first += context.block_dimension(), - result += context.block_dimension()) - { - *result = *first; - } // end for - - return end_of_output; -} // end copy() - -} // end namespace dispatch -} // end namespace detail - -template - __thrust_forceinline__ __device__ - RandomAccessIterator2 copy(Context context, - RandomAccessIterator1 first, - RandomAccessIterator1 last, - RandomAccessIterator2 result) -{ - return detail::dispatch::copy(context, first, last, result, -#if __CUDA_ARCH__ < 200 - // does not work reliably on pre-Fermi due to "Warning: ... assuming global memory space" issues - thrust::detail::false_type() -#else - typename thrust::detail::dispatch::is_trivial_copy::type() -#endif - ); -} // end copy() - - -template -inline __device__ -RandomAccessIterator2 copy_n(Context &ctx, RandomAccessIterator1 first, Size n, RandomAccessIterator2 result) -{ - for(Size i = ctx.thread_index(); i < n; i += ctx.block_dimension()) - { - result[i] = first[i]; - } - - ctx.barrier(); - - return result + n; -} - - -} // end namespace block -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/block/exclusive_scan.h b/compat/thrust/system/cuda/detail/block/exclusive_scan.h deleted file mode 100644 index 580a7578bf..0000000000 --- a/compat/thrust/system/cuda/detail/block/exclusive_scan.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace block -{ - - -template -inline __device__ -typename thrust::iterator_value::type - inplace_exclusive_scan(Context &ctx, RandomAccessIterator first, T init, BinaryFunction op) -{ - // perform an inclusive scan, then shift right - block::inplace_inclusive_scan(ctx, first, op); - - typename thrust::iterator_value::type carry = first[ctx.block_dimension() - 1]; - - ctx.barrier(); - - typename thrust::iterator_value::type left = (ctx.thread_index() == 0) ? init : first[ctx.thread_index() - 1]; - - ctx.barrier(); - - first[ctx.thread_index()] = left; - - ctx.barrier(); - - return carry; -} - - -template -inline __device__ - typename thrust::iterator_value::type - inplace_exclusive_scan(Context &ctx, Iterator first, T init) -{ - return block::inplace_exclusive_scan(ctx, first, init, thrust::plus::type>()); -} - - -} // end namespace block -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/block/inclusive_scan.h b/compat/thrust/system/cuda/detail/block/inclusive_scan.h deleted file mode 100644 index 012f7cd2f7..0000000000 --- a/compat/thrust/system/cuda/detail/block/inclusive_scan.h +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace block -{ - -template -__device__ __thrust_forceinline__ -void inclusive_scan(Context context, - InputIterator first, - BinaryFunction binary_op) -{ - // TODO generalize to arbitrary n - // TODO support dynamic block_size - const unsigned int block_size = Context::ThreadsPerBlock::value; - - typename thrust::iterator_value::type val = first[context.thread_index()]; - - if(block_size > 1) { if (context.thread_index() >= 1) { val = binary_op(first[context.thread_index() - 1], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 2) { if (context.thread_index() >= 2) { val = binary_op(first[context.thread_index() - 2], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 4) { if (context.thread_index() >= 4) { val = binary_op(first[context.thread_index() - 4], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 8) { if (context.thread_index() >= 8) { val = binary_op(first[context.thread_index() - 8], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 16) { if (context.thread_index() >= 16) { val = binary_op(first[context.thread_index() - 16], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 32) { if (context.thread_index() >= 32) { val = binary_op(first[context.thread_index() - 32], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 64) { if (context.thread_index() >= 64) { val = binary_op(first[context.thread_index() - 64], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 128) { if (context.thread_index() >= 128) { val = binary_op(first[context.thread_index() - 128], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 256) { if (context.thread_index() >= 256) { val = binary_op(first[context.thread_index() - 256], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 512) { if (context.thread_index() >= 512) { val = binary_op(first[context.thread_index() - 512], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } - if(block_size > 1024) { if (context.thread_index() >= 1024) { val = binary_op(first[context.thread_index() - 1024], val); } context.barrier(); first[context.thread_index()] = val; context.barrier(); } -} // end inclusive_scan() - - -template -__device__ __thrust_forceinline__ -void inclusive_scan_n(Context context, - InputIterator first, - Size n, - BinaryFunction binary_op) -{ - // TODO support n > context.block_dimension() - typename thrust::iterator_value::type val = first[context.thread_index()]; - - for (unsigned int i = 1; i < n; i <<= 1) - { - if (context.thread_index() < n && context.thread_index() >= i) - val = binary_op(first[context.thread_index() - i], val); - - context.barrier(); - - first[context.thread_index()] = val; - - context.barrier(); - } -} // end inclusive_scan() - - -template -__device__ __thrust_forceinline__ -void inclusive_scan_by_flag(Context context, - InputIterator1 first1, - InputIterator2 first2, - BinaryFunction binary_op) -{ - // TODO generalize to arbitrary n - // TODO support dynamic block_size - const unsigned int block_size = Context::ThreadsPerBlock::value; - - typename thrust::iterator_value::type flg = first1[context.thread_index()]; - typename thrust::iterator_value::type val = first2[context.thread_index()]; - - if(block_size > 1) { if (context.thread_index() >= 1) { if (!flg) { flg |= first1[context.thread_index() - 1]; val = binary_op(first2[context.thread_index() - 1], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 2) { if (context.thread_index() >= 2) { if (!flg) { flg |= first1[context.thread_index() - 2]; val = binary_op(first2[context.thread_index() - 2], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 4) { if (context.thread_index() >= 4) { if (!flg) { flg |= first1[context.thread_index() - 4]; val = binary_op(first2[context.thread_index() - 4], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 8) { if (context.thread_index() >= 8) { if (!flg) { flg |= first1[context.thread_index() - 8]; val = binary_op(first2[context.thread_index() - 8], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 16) { if (context.thread_index() >= 16) { if (!flg) { flg |= first1[context.thread_index() - 16]; val = binary_op(first2[context.thread_index() - 16], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 32) { if (context.thread_index() >= 32) { if (!flg) { flg |= first1[context.thread_index() - 32]; val = binary_op(first2[context.thread_index() - 32], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 64) { if (context.thread_index() >= 64) { if (!flg) { flg |= first1[context.thread_index() - 64]; val = binary_op(first2[context.thread_index() - 64], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 128) { if (context.thread_index() >= 128) { if (!flg) { flg |= first1[context.thread_index() - 128]; val = binary_op(first2[context.thread_index() - 128], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 256) { if (context.thread_index() >= 256) { if (!flg) { flg |= first1[context.thread_index() - 256]; val = binary_op(first2[context.thread_index() - 256], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 512) { if (context.thread_index() >= 512) { if (!flg) { flg |= first1[context.thread_index() - 512]; val = binary_op(first2[context.thread_index() - 512], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } - if(block_size > 1024) { if (context.thread_index() >= 1024) { if (!flg) { flg |= first1[context.thread_index() - 1024]; val = binary_op(first2[context.thread_index() - 1024], val); } } context.barrier(); first1[context.thread_index()] = flg; first2[context.thread_index()] = val; context.barrier(); } -} // end inclusive_scan_by_flag() - - -template -__device__ __thrust_forceinline__ -void inclusive_scan_by_flag_n(Context context, - InputIterator1 first1, - InputIterator2 first2, - Size n, - BinaryFunction binary_op) -{ - // TODO support n > context.block_dimension() - typename thrust::iterator_value::type flg = first1[context.thread_index()]; - typename thrust::iterator_value::type val = first2[context.thread_index()]; - - for (unsigned int i = 1; i < n; i <<= 1) - { - if (context.thread_index() < n && context.thread_index() >= i) - { - if (!flg) - { - flg |= first1[context.thread_index() - i]; - val = binary_op(first2[context.thread_index() - i], val); - } - } - - context.barrier(); - - first1[context.thread_index()] = flg; - first2[context.thread_index()] = val; - - context.barrier(); - } -} // end inclusive_scan_by_flag() - - -template -__device__ __thrust_forceinline__ -void inplace_inclusive_scan(Context &ctx, RandomAccessIterator first, BinaryFunction op) -{ - typename thrust::iterator_value::type x = first[ctx.thread_index()]; - - for(unsigned int offset = 1; offset < ctx.block_dimension(); offset *= 2) - { - if(ctx.thread_index() >= offset) - { - x = op(first[ctx.thread_index() - offset], x); - } - - ctx.barrier(); - - first[ctx.thread_index()] = x; - - ctx.barrier(); - } -} - - -template -__device__ __thrust_forceinline__ -void inplace_inclusive_scan(Context &ctx, RandomAccessIterator first) -{ - block::inplace_inclusive_scan(ctx, first, thrust::plus::type>()); -} - - -} // end namespace block -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/block/merge.h b/compat/thrust/system/cuda/detail/block/merge.h deleted file mode 100644 index 9af0b7bfbb..0000000000 --- a/compat/thrust/system/cuda/detail/block/merge.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace block -{ - -template -__device__ __thrust_forceinline__ - RandomAccessIterator3 merge(Context context, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - StrictWeakOrdering comp); - -// XXX assumes that context.block_dimension() <= n1 and -// context.block_dimension() <= n2 -// This algorithm is analogous to inplace_merge -// but instead of working on the ranges -// [first, middle) and [middle, last) -// it works on the ranges -// [first, first + n1) and [first + n1, first + n1 + n2) -template -__device__ __thrust_forceinline__ - void inplace_merge_by_key_n(Context context, - RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - Size1 n1, - Size2 n2, - StrictWeakOrdering comp); - -} // end namespace block -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/block/merge.inl b/compat/thrust/system/cuda/detail/block/merge.inl deleted file mode 100644 index 5eae2b58f3..0000000000 --- a/compat/thrust/system/cuda/detail/block/merge.inl +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace block -{ - -template -__device__ __thrust_forceinline__ - RandomAccessIterator3 merge(Context context, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_difference::type difference1; - typedef typename thrust::iterator_difference::type difference2; - - difference1 n1 = last1 - first1; - difference2 n2 = last2 - first2; - - // find the rank of each element in the other array - difference2 rank2 = 0; - if(context.thread_index() < n1) - { - RandomAccessIterator1 x = first1; - x += context.thread_index(); - - // lower_bound ensures that x sorts before any equivalent element of input2 - // this ensures stability - rank2 = thrust::system::detail::generic::scalar::lower_bound(first2, last2, raw_reference_cast(*x), comp) - first2; - } // end if - - difference1 rank1 = 0; - if(context.thread_index() < n2) - { - RandomAccessIterator2 x = first2 + context.thread_index(); - - // upper_bound ensures that x sorts before any equivalent element of input1 - // this ensures stability - rank1 = thrust::system::detail::generic::scalar::upper_bound(first1, last1, raw_reference_cast(*x), comp) - first1; - } // end if - - if(context.thread_index() < n1) - { - // scatter each element from input1 - RandomAccessIterator1 src = first1 + context.thread_index(); - RandomAccessIterator3 dst = result + context.thread_index() + rank2; - - *dst = *src; - } - - if(context.thread_index() < n2) - { - // scatter each element from input2 - RandomAccessIterator2 src = first2 + context.thread_index(); - RandomAccessIterator3 dst = result + context.thread_index() + rank1; - - *dst = *src; - } - - return result + n1 + n2; -} // end merge - - -template -__device__ __thrust_forceinline__ - void inplace_merge_by_key_n(Context context, - RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - Size1 n1, - Size2 n2, - StrictWeakOrdering comp) -{ - RandomAccessIterator1 input1 = keys_first; - RandomAccessIterator1 input2 = keys_first + n1; - - RandomAccessIterator2 input1val = values_first; - RandomAccessIterator2 input2val = values_first + n1; - - typedef typename thrust::iterator_value::type KeyType; - typedef typename thrust::iterator_value::type ValueType; - - // XXX use uninitialized here - KeyType inp1 = input1[context.thread_index()]; ValueType inp1val = input1val[context.thread_index()]; - KeyType inp2 = input2[context.thread_index()]; ValueType inp2val = input2val[context.thread_index()]; - - // to merge input1 and input2, use binary search to find the rank of inp1 & inp2 in arrays input2 & input1, respectively - // as before, the "end" variables point to one element after the last element of the arrays - - // start by looking through input2 for inp1's rank - unsigned int start_1 = 0; - - // don't do the search if our value is beyond the end of input1 - if(context.thread_index() < n1) - { - start_1 = thrust::system::detail::generic::scalar::lower_bound_n(input2, n2, inp1, comp) - input2; - } // end if - - // now look through input1 for inp2's rank - unsigned int start_2 = 0; - - // don't do the search if our value is beyond the end of input2 - if(context.thread_index() < n2) - { - // upper_bound ensures that equivalent elements in the first range sort before the second - start_2 = thrust::system::detail::generic::scalar::upper_bound_n(input1, n1, inp2, comp) - input1; - } // end if - - context.barrier(); - - // Write back into the right position to the input arrays; can be done in place since we read in - // the input arrays into registers before. - if(context.thread_index() < n1) - { - input1[start_1 + context.thread_index()] = inp1; - input1val[start_1 + context.thread_index()] = inp1val; - } // end if - - if(context.thread_index() < n2) - { - input1[start_2 + context.thread_index()] = inp2; - input1val[start_2 + context.thread_index()] = inp2val; - } // end if -} // end inplace_merge_by_key_n() - - -} // end namespace block -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/block/merging_sort.h b/compat/thrust/system/cuda/detail/block/merging_sort.h deleted file mode 100644 index 8f8f999ec5..0000000000 --- a/compat/thrust/system/cuda/detail/block/merging_sort.h +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file merging_sort.h - * \brief Block version of merge sort - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace block -{ - - -template -__device__ void conditional_swap(RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - const unsigned int i, - const unsigned int end, - bool pred, - Compare comp) -{ - typedef typename thrust::iterator_traits::value_type KeyType; - typedef typename thrust::iterator_traits::value_type ValueType; - - if(pred && i+1 -__device__ void transposition_sort(Context context, - RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - const unsigned int i, - const unsigned int end, - const unsigned int size, - Compare comp) -{ - const bool is_odd = i&0x1; - - for(unsigned int round=size/2; round>0; --round) - { - // ODDS - conditional_swap(keys_first, values_first, i, end, is_odd, comp); - context.barrier(); - - // EVENS - conditional_swap(keys_first, values_first, i, end, !is_odd, comp); - context.barrier(); - } -} - -template -__device__ void merge(Context context, - RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - const unsigned int i, - const unsigned int n, - unsigned int begin, - unsigned int end, - unsigned int h, - StrictWeakOrdering cmp) -{ - // INVARIANT: Every element i resides within a sequence [begin,end) - // of length h which is already sorted - while( h::value_type KeyType; - typedef typename thrust::iterator_traits::value_type ValueType; - - KeyType key; - ValueType value; - - unsigned int rank = i - begin; - - // prevent out-of-bounds access - if(i < new_end) - { - key = keys_first[i]; - - if(begin==new_begin) // in the left side of merging pair - { - RandomAccessIterator1 result = thrust::system::detail::generic::scalar::lower_bound_n(keys_first+end, new_end-end, key, cmp); - rank += (result - (keys_first+end)); - } - else // in the right side of merging pair - { - RandomAccessIterator1 result = thrust::system::detail::generic::scalar::upper_bound_n(keys_first+new_begin, begin-new_begin, key, cmp); - rank += (result - (keys_first+new_begin)); - } - - value = values_first[i]; - } - - context.barrier(); - - if(i < new_end) - { - keys_first[new_begin+rank] = key; - values_first[new_begin+rank] = value; - } - - context.barrier(); - - begin = new_begin; - end = new_end; - } -} - - -/*! Block-wise implementation of merge sort. - * It provides the same external interface as odd_even_sort. - */ -template -__device__ void merging_sort(Context context, - RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - const unsigned int n, - StrictWeakOrdering comp) -{ - // Phase 1: Sort subsequences of length 32 using odd-even - // transposition sort. The code below assumes that h is a - // power of 2. Empirically, 32 delivers best results, - // which is not surprising since that's the warp width. - unsigned int i = context.thread_index(); - unsigned int h = 32; - unsigned int begin=i&(~(h-1)), end=min(n,begin+h); - - transposition_sort(context, keys_first, values_first, i, end, h, comp); - - // Phase 2: Apply merge tree to produce final sorted results - merge(context, keys_first, values_first, i, n, begin, end, h, comp); -} // end merging_sort() - - -} // end namespace block -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/block/odd_even_sort.h b/compat/thrust/system/cuda/detail/block/odd_even_sort.h deleted file mode 100644 index 0fa0ea069a..0000000000 --- a/compat/thrust/system/cuda/detail/block/odd_even_sort.h +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file odd_even_sort.h - * \brief Block versions of Batcher's Odd-Even Merge Sort - */ - -#pragma once - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace block -{ - - -/*! Block-wise implementation of Batcher's Odd-Even Merge Sort - * This implementation is based on Nadathur Satish's. - */ -template - __device__ void odd_even_sort(KeyType *keys, - ValueType *data, - const unsigned int n, - StrictWeakOrdering comp) -{ - for(unsigned int p = blockDim.x>>1; p > 0; p >>= 1) - { - unsigned int q = blockDim.x>>1, r = 0, d = p; - - while(q >= p) - { - unsigned int j = threadIdx.x + d; - - // if j lies beyond the end of the array, we consider it "sorted" wrt i - // regardless of whether i lies beyond the end of the array - if(threadIdx.x < (blockDim.x-d) && (threadIdx.x & p) == r && j < n) - { - KeyType xikey = keys[threadIdx.x]; - KeyType xjkey = keys[j]; - - ValueType xivalue = data[threadIdx.x]; - ValueType xjvalue = data[j]; - - // does xj sort before xi? - if(comp(xjkey, xikey)) - { - keys[threadIdx.x] = xjkey; - keys[j] = xikey; - - data[threadIdx.x] = xjvalue; - data[j] = xivalue; - } // end if - } // end if - - d = q - p; - q >>= 1; - r = p; - - __syncthreads(); - } // end while - } // end for p -} // end odd_even_sort() - -template - __device__ void stable_odd_even_sort(KeyType *keys, - ValueType *data, - const unsigned int n, - StrictWeakOrdering comp) -{ - for(unsigned int i = 0; - i < blockDim.x>>1; - ++i) - { - bool thread_is_odd = threadIdx.x & 0x1; - - // do odds first - if(thread_is_odd && threadIdx.x + 1 < n) - { - KeyType xikey = keys[threadIdx.x]; - KeyType xjkey = keys[threadIdx.x + 1]; - - ValueType xivalue = data[threadIdx.x]; - ValueType xjvalue = data[threadIdx.x + 1]; - - // does xj sort before xi? - if(comp(xjkey, xikey)) - { - keys[threadIdx.x] = xjkey; - keys[threadIdx.x + 1] = xikey; - - data[threadIdx.x] = xjvalue; - data[threadIdx.x + 1] = xivalue; - } // end if - } // end if - - __syncthreads(); - - // do evens second - if(!thread_is_odd && threadIdx.x + 1 < n) - { - KeyType xikey = keys[threadIdx.x]; - KeyType xjkey = keys[threadIdx.x + 1]; - - ValueType xivalue = data[threadIdx.x]; - ValueType xjvalue = data[threadIdx.x + 1]; - - // does xj sort before xi? - if(comp(xjkey, xikey)) - { - keys[threadIdx.x] = xjkey; - keys[threadIdx.x + 1] = xikey; - - data[threadIdx.x] = xjvalue; - data[threadIdx.x + 1] = xivalue; - } // end if - } // end if - - __syncthreads(); - } // end for i -} // end stable_odd_even_sort() - - -} // end namespace block -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/block/reduce.h b/compat/thrust/system/cuda/detail/block/reduce.h deleted file mode 100644 index e0a1901b28..0000000000 --- a/compat/thrust/system/cuda/detail/block/reduce.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace block -{ - -/* Reduces [data, data + n) using binary_op and stores the result in data[0] - * - * Upon return the elements in [data + 1, data + n) have unspecified values. - */ -template -__device__ __thrust_forceinline__ -void reduce_n(Context context, ValueIterator data, unsigned int n, BinaryFunction binary_op) -{ - if (context.block_dimension() < n) - { - for (unsigned int i = context.block_dimension() + context.thread_index(); i < n; i += context.block_dimension()) - data[context.thread_index()] = binary_op(data[context.thread_index()], data[i]); - - context.barrier(); - } - - while (n > 1) - { - unsigned int half = n / 2; - - if (context.thread_index() < half) - data[context.thread_index()] = binary_op(data[context.thread_index()], data[n - context.thread_index() - 1]); - - context.barrier(); - - n = n - half; - } -} - -} // end namespace block -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/copy.h b/compat/thrust/system/cuda/detail/copy.h deleted file mode 100644 index 8f7ee97c24..0000000000 --- a/compat/thrust/system/cuda/detail/copy.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -template - OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -template - OutputIterator copy(cross_system exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -template - OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result); - - -template - OutputIterator copy_n(cross_system exec, - InputIterator first, - Size n, - OutputIterator result); - - -} // end detail -} // end cuda -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/copy.inl b/compat/thrust/system/cuda/detail/copy.inl deleted file mode 100644 index 125eebdaa5..0000000000 --- a/compat/thrust/system/cuda/detail/copy.inl +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -template - OutputIterator copy(execution_policy &system, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - return thrust::system::cuda::detail::copy_device_to_device(system,first,last,result); -} // end copy() - - -template - OutputIterator copy(cross_system systems, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - return thrust::system::cuda::detail::copy_cross_system(systems,first,last,result); -} // end copy() - - -template - OutputIterator copy_n(execution_policy &system, - InputIterator first, - Size n, - OutputIterator result) -{ - return thrust::system::cuda::detail::copy_device_to_device(system,first,first+n,result); -} // end copy_n() - - -template - OutputIterator copy_n(cross_system systems, - InputIterator first, - Size n, - OutputIterator result) -{ - return thrust::system::cuda::detail::copy_cross_system_n(systems,first,n,result); -} // end copy_n() - - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/copy_cross_system.h b/compat/thrust/system/cuda/detail/copy_cross_system.h deleted file mode 100644 index f68ea3c88a..0000000000 --- a/compat/thrust/system/cuda/detail/copy_cross_system.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -template - OutputIterator copy_cross_system(cross_system systems, - InputIterator begin, - InputIterator end, - OutputIterator result); - - -template - OutputIterator copy_cross_system_n(cross_system systems, - InputIterator begin, - Size n, - OutputIterator result); - - -} // end detail -} // end cuda -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/copy_cross_system.inl b/compat/thrust/system/cuda/detail/copy_cross_system.inl deleted file mode 100644 index 861cb2c2cd..0000000000 --- a/compat/thrust/system/cuda/detail/copy_cross_system.inl +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -// XXX WAR circular #inclusion problem -template class temporary_array; - -} // end detail - -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -// general input to random access case -template - RandomAccessIterator copy_cross_system(cross_system systems, - InputIterator begin, - InputIterator end, - RandomAccessIterator result, - thrust::incrementable_traversal_tag, - thrust::random_access_traversal_tag) -{ - //std::cerr << std::endl; - //std::cerr << "general copy_host_to_device(): InputIterator: " << typeid(InputIterator).name() << std::endl; - //std::cerr << "general copy_host_to_device(): OutputIterator: " << typeid(OutputIterator).name() << std::endl; - - typedef typename thrust::iterator_value::type InputType; - - // allocate temporary storage in System1 - thrust::detail::temporary_array temp(systems.system1,begin,end); - return thrust::copy(systems, temp.begin(), temp.end(), result); -} - -template - RandomAccessIterator copy_cross_system_n(cross_system systems, - InputIterator first, - Size n, - RandomAccessIterator result, - thrust::incrementable_traversal_tag, - thrust::random_access_traversal_tag) -{ - typedef typename thrust::iterator_value::type InputType; - - // allocate and copy to temporary storage System1 - thrust::detail::temporary_array temp(systems.system1, first, n); - - // recurse - return copy_cross_system(systems, temp.begin(), temp.end(), result); -} - - -// random access to general output case -template - OutputIterator copy_cross_system(cross_system systems, - RandomAccessIterator begin, - RandomAccessIterator end, - OutputIterator result, - thrust::random_access_traversal_tag, - thrust::incrementable_traversal_tag) -{ - typedef typename thrust::iterator_value::type InputType; - - // copy to temporary storage in System2 - thrust::detail::temporary_array temp(systems.system2, systems.system1, begin, end); - - return thrust::copy(systems.system2, temp.begin(), temp.end(), result); -} - -template - OutputIterator copy_cross_system_n(cross_system systems, - RandomAccessIterator first, - Size n, - OutputIterator result, - thrust::random_access_traversal_tag, - thrust::incrementable_traversal_tag) -{ - typedef typename thrust::iterator_value::type InputType; - - // copy to temporary storage in System2 - thrust::detail::temporary_array temp(systems.system2, systems.system1, first, n); - - // copy temp to result - return thrust::copy(systems.system2, temp.begin(), temp.end(), result); -} - - -// trivial copy -template - RandomAccessIterator2 copy_cross_system(cross_system systems, - RandomAccessIterator1 begin, - RandomAccessIterator1 end, - RandomAccessIterator2 result, - thrust::random_access_traversal_tag, - thrust::random_access_traversal_tag, - thrust::detail::true_type) // trivial copy -{ -// std::cerr << std::endl; -// std::cerr << "random access copy_device_to_host(): trivial" << std::endl; -// std::cerr << "general copy_device_to_host(): RandomAccessIterator1: " << typeid(RandomAccessIterator1).name() << std::endl; -// std::cerr << "general copy_device_to_host(): RandomAccessIterator2: " << typeid(RandomAccessIterator2).name() << std::endl; - - // how many elements to copy? - typename thrust::iterator_traits::difference_type n = end - begin; - - thrust::system::cuda::detail::trivial_copy_n(systems, begin, n, result); - - return result + n; -} - - -namespace detail -{ - -// random access non-trivial iterator to random access iterator -template - RandomAccessIterator2 non_trivial_random_access_copy_cross_system(cross_system systems, - RandomAccessIterator1 begin, - RandomAccessIterator1 end, - RandomAccessIterator2 result, - thrust::detail::false_type) // InputIterator is non-trivial -{ - // copy the input to a temporary input system buffer of OutputType - typedef typename thrust::iterator_value::type OutputType; - - // allocate temporary storage in System1 - thrust::detail::temporary_array temp(systems.system1, begin, end); - - // recurse - return copy_cross_system(systems, temp.begin(), temp.end(), result); -} - -template - RandomAccessIterator2 non_trivial_random_access_copy_cross_system(cross_system systems, - RandomAccessIterator1 begin, - RandomAccessIterator1 end, - RandomAccessIterator2 result, - thrust::detail::true_type) // InputIterator is trivial -{ - typename thrust::iterator_difference::type n = thrust::distance(begin, end); - - // allocate temporary storage in System2 - // retain the input's type for the intermediate storage - // do not initialize the storage (the 0 does this) - typedef typename thrust::iterator_value::type InputType; - thrust::detail::temporary_array temp(0, systems.system2, n); - - // force a trivial (memcpy) copy of the input to the temporary - // note that this will not correctly account for copy constructors - // but there's nothing we can do about that - // XXX one thing we might try is to use pinned memory for the temporary storage - // this might allow us to correctly account for copy constructors - thrust::system::cuda::detail::trivial_copy_n(systems, begin, n, temp.begin()); - - // finally, copy to the result - return thrust::copy(systems.system2, temp.begin(), temp.end(), result); -} - -} // end detail - - -// random access iterator to random access host iterator with non-trivial copy -template - RandomAccessIterator2 copy_cross_system(cross_system systems, - RandomAccessIterator1 begin, - RandomAccessIterator1 end, - RandomAccessIterator2 result, - thrust::random_access_traversal_tag, - thrust::random_access_traversal_tag, - thrust::detail::false_type) // is_trivial_copy -{ - // dispatch a non-trivial random access cross system copy based on whether or not the InputIterator is trivial - return detail::non_trivial_random_access_copy_cross_system(systems, begin, end, result, - typename thrust::detail::is_trivial_iterator::type()); -} - -// random access iterator to random access iterator -template - RandomAccessIterator2 copy_cross_system(cross_system systems, - RandomAccessIterator1 begin, - RandomAccessIterator1 end, - RandomAccessIterator2 result, - thrust::random_access_traversal_tag input_traversal, - thrust::random_access_traversal_tag output_traversal) -{ - // dispatch on whether this is a trivial copy - return copy_cross_system(systems, begin, end, result, input_traversal, output_traversal, - typename thrust::detail::dispatch::is_trivial_copy::type()); -} - -template - RandomAccessIterator2 copy_cross_system_n(cross_system systems, - RandomAccessIterator1 first, - Size n, - RandomAccessIterator2 result, - thrust::random_access_traversal_tag input_traversal, - thrust::random_access_traversal_tag output_traversal) -{ - // implement with copy_cross_system - return copy_cross_system(systems, first, first + n, result, input_traversal, output_traversal); -} - -///////////////// -// Entry Point // -///////////////// - -template - OutputIterator copy_cross_system(cross_system systems, - InputIterator begin, - InputIterator end, - OutputIterator result) -{ - return copy_cross_system(systems, begin, end, result, - typename thrust::iterator_traversal::type(), - typename thrust::iterator_traversal::type()); -} - -template - OutputIterator copy_cross_system_n(cross_system systems, - InputIterator begin, - Size n, - OutputIterator result) -{ - return copy_cross_system_n(systems, begin, n, result, - typename thrust::iterator_traversal::type(), - typename thrust::iterator_traversal::type()); -} - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/copy_device_to_device.h b/compat/thrust/system/cuda/detail/copy_device_to_device.h deleted file mode 100644 index a7d8df8613..0000000000 --- a/compat/thrust/system/cuda/detail/copy_device_to_device.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file copy_device_to_device.h - * \brief Device implementations for copying on the device. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -template - OutputIterator copy_device_to_device(execution_policy &exec, - InputIterator begin, - InputIterator end, - OutputIterator result); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/copy_device_to_device.inl b/compat/thrust/system/cuda/detail/copy_device_to_device.inl deleted file mode 100644 index c8263c5d55..0000000000 --- a/compat/thrust/system/cuda/detail/copy_device_to_device.inl +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template - OutputIterator copy_device_to_device(execution_policy &exec, - InputIterator begin, - InputIterator end, - OutputIterator result, - thrust::detail::false_type) -{ - // general case (mixed types) - typedef typename thrust::iterator_traits::value_type InputType; - -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - return thrust::transform(exec, begin, end, result, thrust::identity()); -#else - // we're not compiling with nvcc: copy [begin, end) to temp host memory - typename thrust::iterator_traits::difference_type n = thrust::distance(begin, end); - - thrust::host_system_tag temp_exec; - thrust::detail::temporary_array temp1(temp_exec, begin, end); - - // transform temp1 to OutputType in host memory - typedef typename thrust::iterator_traits::value_type OutputType; - - thrust::detail::temporary_array temp2(temp_exec, temp1.begin(), temp1.end()); - - // copy temp2 to device - result = thrust::system::cuda::detail::copy_cross_system(temp2.begin(), temp2.end(), result); - - return result; -#endif // THRUST_DEVICE_COMPILER_NVCC -} - - -template - OutputIterator copy_device_to_device(execution_policy &exec, - InputIterator begin, - InputIterator end, - OutputIterator result, - thrust::detail::true_type) -{ - // specialization for device to device when the value_types match, operator= is not overloaded, - // and the iterators are pointers - - // how many elements to copy? - typename thrust::iterator_traits::difference_type n = end - begin; - - thrust::system::cuda::detail::trivial_copy_n(exec, begin, n, result); - - return result + n; -} - -} // end namespace detail - -///////////////// -// Entry Point // -///////////////// - -template - OutputIterator copy_device_to_device(execution_policy &exec, - InputIterator begin, - InputIterator end, - OutputIterator result) -{ - typedef typename thrust::iterator_traits::value_type InputType; - typedef typename thrust::iterator_traits::value_type OutputType; - - const bool use_trivial_copy = - thrust::detail::is_same::value - && thrust::detail::is_trivial_iterator::value - && thrust::detail::is_trivial_iterator::value; - - // XXX WAR unused variable warning - (void) use_trivial_copy; - - return detail::copy_device_to_device(exec, begin, end, result, - thrust::detail::integral_constant()); - -} - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/copy_if.h b/compat/thrust/system/cuda/detail/copy_if.h deleted file mode 100644 index 5ed0f6c9c4..0000000000 --- a/compat/thrust/system/cuda/detail/copy_if.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - OutputIterator copy_if(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/copy_if.inl b/compat/thrust/system/cuda/detail/copy_if.inl deleted file mode 100644 index 15ea7faa82..0000000000 --- a/compat/thrust/system/cuda/detail/copy_if.inl +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template -struct copy_if_intervals_closure -{ - InputIterator1 input; - InputIterator2 stencil; - InputIterator3 offsets; - Decomposition decomp; - OutputIterator output; - - typedef Context context_type; - context_type context; - - copy_if_intervals_closure(InputIterator1 input, - InputIterator2 stencil, - InputIterator3 offsets, - Decomposition decomp, - OutputIterator output, - Context context = Context()) - : input(input), stencil(stencil), offsets(offsets), decomp(decomp), output(output), context(context) {} - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename thrust::iterator_value::type OutputType; - - typedef unsigned int PredicateType; - - const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value; - - thrust::plus binary_op; - - __shared__ PredicateType sdata[CTA_SIZE]; context.barrier(); - - typedef typename Decomposition::index_type IndexType; - - // this block processes results in [range.begin(), range.end()) - thrust::system::detail::internal::index_range range = decomp[context.block_index()]; - - IndexType base = range.begin(); - - PredicateType predicate = 0; - - // advance input iterators to this thread's starting position - input += base + context.thread_index(); - stencil += base + context.thread_index(); - - // advance output to this interval's starting position - if (context.block_index() != 0) - { - InputIterator3 temp = offsets + (context.block_index() - 1); - output += *temp; - } - - // process full blocks - while(base + CTA_SIZE <= range.end()) - { - // read data - sdata[context.thread_index()] = predicate = *stencil; - - context.barrier(); - - // scan block - block::inclusive_scan(context, sdata, binary_op); - - // write data - if (predicate) - { - OutputIterator temp2 = output + (sdata[context.thread_index()] - 1); - *temp2 = *input; - } - - // advance inputs by CTA_SIZE - base += CTA_SIZE; - input += CTA_SIZE; - stencil += CTA_SIZE; - - // advance output by number of true predicates - output += sdata[CTA_SIZE - 1]; - - context.barrier(); - } - - // process partially full block at end of input (if necessary) - if (base < range.end()) - { - // read data - if (base + context.thread_index() < range.end()) - sdata[context.thread_index()] = predicate = *stencil; - else - sdata[context.thread_index()] = predicate = 0; - - context.barrier(); - - // scan block - block::inclusive_scan(context, sdata, binary_op); - - // write data - if (predicate) // expects predicate=false for >= interval_end - { - OutputIterator temp2 = output + (sdata[context.thread_index()] - 1); - *temp2 = *input; - } - } - } -}; // copy_if_intervals_closure - - -template - OutputIterator copy_if(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator output, - Predicate pred) -{ - typedef typename thrust::iterator_difference::type IndexType; - typedef typename thrust::iterator_value::type OutputType; - - if (first == last) - return output; - - typedef thrust::system::detail::internal::uniform_decomposition Decomposition; - typedef thrust::detail::temporary_array IndexArray; - - Decomposition decomp = default_decomposition(last - first); - - // storage for per-block predicate counts - IndexArray block_results(exec, decomp.size()); - - // convert stencil into an iterator that produces integral values in {0,1} - typedef typename thrust::detail::predicate_to_integral PredicateToIndexTransform; - typedef thrust::transform_iterator PredicateToIndexIterator; - - PredicateToIndexIterator predicate_stencil(stencil, PredicateToIndexTransform(pred)); - - // compute number of true values in each interval - thrust::system::cuda::detail::reduce_intervals(exec, predicate_stencil, block_results.begin(), thrust::plus(), decomp); - - // scan the partial sums - thrust::inclusive_scan(exec, block_results.begin(), block_results.end(), block_results.begin(), thrust::plus()); - - // copy values to output - const unsigned int ThreadsPerBlock = 256; - typedef typename IndexArray::iterator InputIterator3; - typedef detail::statically_blocked_thread_array Context; - typedef copy_if_intervals_closure Closure; - Closure closure(first, predicate_stencil, block_results.begin(), decomp, output); - detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); - - return output + block_results[decomp.size() - 1]; -} // end copy_if() - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - diff --git a/compat/thrust/system/cuda/detail/count.h b/compat/thrust/system/cuda/detail/count.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/count.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/cuda_launch_config.h b/compat/thrust/system/cuda/detail/cuda_launch_config.h deleted file mode 100644 index b7f0ca2409..0000000000 --- a/compat/thrust/system/cuda/detail/cuda_launch_config.h +++ /dev/null @@ -1,384 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -// XXX define our own device_properties_t to avoid errors when #including -// this file in the absence of a CUDA installation -struct device_properties_t -{ - // mirror the type and spelling of cudaDeviceProp's members - // keep these alphabetized - int major; - int maxGridSize[3]; - int maxThreadsPerBlock; - int maxThreadsPerMultiProcessor; - int minor; - int multiProcessorCount; - int regsPerBlock; - size_t sharedMemPerBlock; - int warpSize; -}; - - -// XXX define our own device_properties_t to avoid errors when #including -// this file in the absence of a CUDA installation -struct function_attributes_t -{ - // mirror the type and spelling of cudaFuncAttributes' members - // keep these alphabetized - size_t constSizeBytes; - size_t localSizeBytes; - int maxThreadsPerBlock; - int numRegs; - size_t sharedSizeBytes; -}; - - -/*! Computes a block size in number of threads for a CUDA kernel using a occupancy-promoting heuristic. - * \param attributes The cudaFuncAttributes corresponding to a __global__ function of interest on a GPU of interest. - * \param properties The cudaDeviceProp corresponding to a GPU on which to launch the __global__ function of interest. - * \return A CUDA block size, in number of threads, which the resources of the GPU's streaming multiprocessor can - * accomodate and which is intended to promote occupancy. The result is equivalent to the one performed by - * the "CUDA Occupancy Calculator". - * \note The __global__ function of interest is presumed to use 0 bytes of dynamically-allocated __shared__ memory. - */ -inline __host__ __device__ -std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes, - const device_properties_t &properties); - -/*! Computes a block size in number of threads for a CUDA kernel using a occupancy-promoting heuristic. - * Use this version of the function when a CUDA block's dynamically-allocated __shared__ memory requirements - * vary with the size of the block. - * \param attributes The cudaFuncAttributes corresponding to a __global__ function of interest on a GPU of interest. - * \param properties The cudaDeviceProp corresponding to a GPU on which to launch the __global__ function of interest. - * \param block_size_to_dynamic_smem_bytes A unary function which maps an integer CUDA block size to the number of bytes - * of dynamically-allocated __shared__ memory required by a CUDA block of that size. - * \return A CUDA block size, in number of threads, which the resources of the GPU's streaming multiprocessor can - * accomodate and which is intended to promote occupancy. The result is equivalent to the one performed by - * the "CUDA Occupancy Calculator". - */ -template -inline __host__ __device__ -std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes, - const device_properties_t &properties, - UnaryFunction block_size_to_dynamic_smem_size); - - -/*! Returns the maximum amount of dynamic shared memory each block - * can utilize without reducing thread occupancy. - * - * \param properties CUDA device properties - * \param attributes CUDA function attributes - * \param blocks_per_processor Number of blocks per streaming multiprocessor - */ -inline __host__ __device__ -size_t proportional_smem_allocation(const device_properties_t &properties, - const function_attributes_t &attributes, - size_t blocks_per_processor); - - -template -inline __host__ __device__ -size_t max_blocksize_subject_to_smem_usage(const device_properties_t &properties, - const function_attributes_t &attributes, - UnaryFunction blocksize_to_dynamic_smem_usage); - - - -namespace cuda_launch_config_detail -{ - -using std::size_t; - -namespace util -{ - - -template -inline __host__ __device__ -T min_(const T &lhs, const T &rhs) -{ - return rhs < lhs ? rhs : lhs; -} - - -template -struct zero_function -{ - inline __host__ __device__ - T operator()(T) - { - return 0; - } -}; - - -// x/y rounding towards +infinity for integers, used to determine # of blocks/warps etc. -template - inline __host__ __device__ L divide_ri(const L x, const R y) -{ - return (x + (y - 1)) / y; -} - -// x/y rounding towards zero for integers, used to determine # of blocks/warps etc. -template - inline __host__ __device__ L divide_rz(const L x, const R y) -{ - return x / y; -} - -// round x towards infinity to the next multiple of y -template - inline __host__ __device__ L round_i(const L x, const R y){ return y * divide_ri(x, y); } - -// round x towards zero to the next multiple of y -template - inline __host__ __device__ L round_z(const L x, const R y){ return y * divide_rz(x, y); } - -} // end namespace util - - - -// granularity of shared memory allocation -inline __host__ __device__ -size_t smem_allocation_unit(const device_properties_t &properties) -{ - switch(properties.major) - { - case 1: return 512; - case 2: return 128; - case 3: return 256; - default: return 256; // unknown GPU; have to guess - } -} - - -// granularity of register allocation -inline __host__ __device__ -size_t reg_allocation_unit(const device_properties_t &properties, const size_t regsPerThread) -{ - switch(properties.major) - { - case 1: return (properties.minor <= 1) ? 256 : 512; - case 2: switch(regsPerThread) - { - case 21: - case 22: - case 29: - case 30: - case 37: - case 38: - case 45: - case 46: - return 128; - default: - return 64; - } - case 3: return 256; - default: return 256; // unknown GPU; have to guess - } -} - - -// granularity of warp allocation -inline __host__ __device__ -size_t warp_allocation_multiple(const device_properties_t &properties) -{ - return (properties.major <= 1) ? 2 : 1; -} - -// number of "sides" into which the multiprocessor is partitioned -inline __host__ __device__ -size_t num_sides_per_multiprocessor(const device_properties_t &properties) -{ - switch(properties.major) - { - case 1: return 1; - case 2: return 2; - case 3: return 4; - default: return 4; // unknown GPU; have to guess - } -} - - -inline __host__ __device__ -size_t max_blocks_per_multiprocessor(const device_properties_t &properties) -{ - return (properties.major <= 2) ? 8 : 16; -} - - -inline __host__ __device__ -size_t max_active_blocks_per_multiprocessor(const device_properties_t &properties, - const function_attributes_t &attributes, - int CTA_SIZE, - size_t dynamic_smem_bytes) -{ - // Determine the maximum number of CTAs that can be run simultaneously per SM - // This is equivalent to the calculation done in the CUDA Occupancy Calculator spreadsheet - - ////////////////////////////////////////// - // Limits due to threads/SM or blocks/SM - ////////////////////////////////////////// - const size_t maxThreadsPerSM = properties.maxThreadsPerMultiProcessor; // 768, 1024, 1536, etc. - const size_t maxBlocksPerSM = max_blocks_per_multiprocessor(properties); - - // Calc limits - const size_t ctaLimitThreads = (CTA_SIZE <= properties.maxThreadsPerBlock) ? maxThreadsPerSM / CTA_SIZE : 0; - const size_t ctaLimitBlocks = maxBlocksPerSM; - - ////////////////////////////////////////// - // Limits due to shared memory/SM - ////////////////////////////////////////// - const size_t smemAllocationUnit = smem_allocation_unit(properties); - const size_t smemBytes = attributes.sharedSizeBytes + dynamic_smem_bytes; - const size_t smemPerCTA = util::round_i(smemBytes, smemAllocationUnit); - - // Calc limit - const size_t ctaLimitSMem = smemPerCTA > 0 ? properties.sharedMemPerBlock / smemPerCTA : maxBlocksPerSM; - - ////////////////////////////////////////// - // Limits due to registers/SM - ////////////////////////////////////////// - const size_t regAllocationUnit = reg_allocation_unit(properties, attributes.numRegs); - const size_t warpAllocationMultiple = warp_allocation_multiple(properties); - const size_t numWarps = util::round_i(util::divide_ri(CTA_SIZE, properties.warpSize), warpAllocationMultiple); - - // Calc limit - size_t ctaLimitRegs; - if(properties.major <= 1) - { - // GPUs of compute capability 1.x allocate registers to CTAs - // Number of regs per block is regs per thread times number of warps times warp size, rounded up to allocation unit - const size_t regsPerCTA = util::round_i(attributes.numRegs * properties.warpSize * numWarps, regAllocationUnit); - ctaLimitRegs = regsPerCTA > 0 ? properties.regsPerBlock / regsPerCTA : maxBlocksPerSM; - } - else - { - // GPUs of compute capability 2.x and higher allocate registers to warps - // Number of regs per warp is regs per thread times times warp size, rounded up to allocation unit - const size_t regsPerWarp = util::round_i(attributes.numRegs * properties.warpSize, regAllocationUnit); - const size_t numSides = num_sides_per_multiprocessor(properties); - const size_t numRegsPerSide = properties.regsPerBlock / numSides; - ctaLimitRegs = regsPerWarp > 0 ? ((numRegsPerSide / regsPerWarp) * numSides) / numWarps : maxBlocksPerSM; - } - - ////////////////////////////////////////// - // Overall limit is min() of limits due to above reasons - ////////////////////////////////////////// - return util::min_(ctaLimitRegs, util::min_(ctaLimitSMem, util::min_(ctaLimitThreads, ctaLimitBlocks))); -} - - -} // end namespace cuda_launch_config_detail - - -template -inline __host__ __device__ -std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes, - const device_properties_t &properties, - UnaryFunction block_size_to_dynamic_smem_size) -{ - size_t max_occupancy = properties.maxThreadsPerMultiProcessor; - size_t largest_blocksize = cuda_launch_config_detail::util::min_(properties.maxThreadsPerBlock, attributes.maxThreadsPerBlock); - size_t granularity = properties.warpSize; - size_t max_blocksize = 0; - size_t highest_occupancy = 0; - - for(size_t blocksize = largest_blocksize; blocksize != 0; blocksize -= granularity) - { - size_t occupancy = blocksize * cuda_launch_config_detail::max_active_blocks_per_multiprocessor(properties, attributes, blocksize, block_size_to_dynamic_smem_size(blocksize)); - - if(occupancy > highest_occupancy) - { - max_blocksize = blocksize; - highest_occupancy = occupancy; - } - - // early out, can't do better - if(highest_occupancy == max_occupancy) - break; - } - - return max_blocksize; -} - - -inline __host__ __device__ -std::size_t block_size_with_maximum_potential_occupancy(const function_attributes_t &attributes, - const device_properties_t &properties) -{ - return block_size_with_maximum_potential_occupancy(attributes, properties, cuda_launch_config_detail::util::zero_function()); -} - - -inline __host__ __device__ -size_t proportional_smem_allocation(const device_properties_t &properties, - const function_attributes_t &attributes, - size_t blocks_per_processor) -{ - size_t smem_per_processor = properties.sharedMemPerBlock; - size_t smem_allocation_unit = cuda_launch_config_detail::smem_allocation_unit(properties); - - size_t total_smem_per_block = cuda_launch_config_detail::util::round_z(smem_per_processor / blocks_per_processor, smem_allocation_unit); - size_t static_smem_per_block = attributes.sharedSizeBytes; - - return total_smem_per_block - static_smem_per_block; -} - - -template -inline __host__ __device__ -size_t max_blocksize_subject_to_smem_usage(const device_properties_t &properties, - const function_attributes_t &attributes, - UnaryFunction blocksize_to_dynamic_smem_usage) -{ - size_t largest_blocksize = (thrust::min)(properties.maxThreadsPerBlock, attributes.maxThreadsPerBlock); - size_t granularity = properties.warpSize; - - for(int blocksize = largest_blocksize; blocksize > 0; blocksize -= granularity) - { - size_t total_smem_usage = blocksize_to_dynamic_smem_usage(blocksize) + attributes.sharedSizeBytes; - - if(total_smem_usage <= properties.sharedMemPerBlock) - { - return blocksize; - } - } - - return 0; -} - - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/default_decomposition.h b/compat/thrust/system/cuda/detail/default_decomposition.h deleted file mode 100644 index 1ed6bcfe20..0000000000 --- a/compat/thrust/system/cuda/detail/default_decomposition.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file default_decomposition.h - * \brief Return a decomposition that is appropriate for the CUDA backend. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template -thrust::system::detail::internal::uniform_decomposition default_decomposition(IndexType n); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/default_decomposition.inl b/compat/thrust/system/cuda/detail/default_decomposition.inl deleted file mode 100644 index 3f0879ac93..0000000000 --- a/compat/thrust/system/cuda/detail/default_decomposition.inl +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template -thrust::system::detail::internal::uniform_decomposition default_decomposition(IndexType n) -{ - // TODO eliminate magical constant - device_properties_t properties = device_properties(); - return thrust::system::detail::internal::uniform_decomposition(n, properties.maxThreadsPerBlock, 10 * properties.multiProcessorCount); -} - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/alignment.h b/compat/thrust/system/cuda/detail/detail/alignment.h deleted file mode 100644 index 31fdaaf422..0000000000 --- a/compat/thrust/system/cuda/detail/detail/alignment.h +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ -namespace alignment_of_detail -{ - - -template class alignment_of_impl; - -template - struct helper -{ - static const std::size_t value = size_diff; -}; - -template - class helper -{ - public: - static const std::size_t value = alignment_of_impl::value; -}; - -template - class alignment_of_impl -{ - private: - struct big { T x; char c; }; - - public: - static const std::size_t value = helper::value; -}; - - -} // end alignment_of_detail - - -template - struct alignment_of - : alignment_of_detail::alignment_of_impl -{}; - - -template struct aligned_type; - -// __align__ is CUDA-specific, so guard it -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - -// implementing aligned_type portably is tricky: - -# if THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC -// implement aligned_type with specialization because MSVC -// requires literals as arguments to declspec(align(n)) -template<> struct aligned_type<1> -{ - struct __align__(1) type { }; -}; - -template<> struct aligned_type<2> -{ - struct __align__(2) type { }; -}; - -template<> struct aligned_type<4> -{ - struct __align__(4) type { }; -}; - -template<> struct aligned_type<8> -{ - struct __align__(8) type { }; -}; - -template<> struct aligned_type<16> -{ - struct __align__(16) type { }; -}; - -template<> struct aligned_type<32> -{ - struct __align__(32) type { }; -}; - -template<> struct aligned_type<64> -{ - struct __align__(64) type { }; -}; - -template<> struct aligned_type<128> -{ - struct __align__(128) type { }; -}; - -template<> struct aligned_type<256> -{ - struct __align__(256) type { }; -}; - -template<> struct aligned_type<512> -{ - struct __align__(512) type { }; -}; - -template<> struct aligned_type<1024> -{ - struct __align__(1024) type { }; -}; - -template<> struct aligned_type<2048> -{ - struct __align__(2048) type { }; -}; - -template<> struct aligned_type<4096> -{ - struct __align__(4096) type { }; -}; - -template<> struct aligned_type<8192> -{ - struct __align__(8192) type { }; -}; -# elif (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_GCC) && (THRUST_GCC_VERSION < 40300) -// implement aligned_type with specialization because gcc 4.2 -// requires literals as arguments to __attribute__(aligned(n)) -template<> struct aligned_type<1> -{ - struct __align__(1) type { }; -}; - -template<> struct aligned_type<2> -{ - struct __align__(2) type { }; -}; - -template<> struct aligned_type<4> -{ - struct __align__(4) type { }; -}; - -template<> struct aligned_type<8> -{ - struct __align__(8) type { }; -}; - -template<> struct aligned_type<16> -{ - struct __align__(16) type { }; -}; - -template<> struct aligned_type<32> -{ - struct __align__(32) type { }; -}; - -template<> struct aligned_type<64> -{ - struct __align__(64) type { }; -}; - -template<> struct aligned_type<128> -{ - struct __align__(128) type { }; -}; - -# else -// assume the compiler allows template parameters as -// arguments to __align__ -template struct aligned_type -{ - struct __align__(Align) type { }; -}; -# endif // THRUST_HOST_COMPILER -#else -template struct aligned_type -{ - struct type { }; -}; -#endif // THRUST_DEVICE_COMPILER - - -template - struct aligned_storage -{ - union type - { - unsigned char data[Len]; - - typename aligned_type::type align; - }; -}; - - -} // end detail -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h b/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h deleted file mode 100644 index e2c5a44941..0000000000 --- a/compat/thrust/system/cuda/detail/detail/b40c/kernel_utils.h +++ /dev/null @@ -1,284 +0,0 @@ -/** - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - */ - - -//------------------------------------------------------------------------------ -// Common B40C Defines, Properties, and Routines -//------------------------------------------------------------------------------ - - -#pragma once - -#include -#include - -namespace thrust { -namespace system { -namespace cuda { -namespace detail { -namespace detail { -namespace b40c_thrust { - -//------------------------------------------------------------------------------ -// Device properties -//------------------------------------------------------------------------------ - - -#ifndef __CUDA_ARCH__ - #define __CUDA_ARCH__ 0 -#endif - -#define B40C_FERMI(version) (version >= 200) -#define B40C_LOG_WARP_THREADS 5 // 32 threads in a warp -#define B40C_WARP_THREADS (1 << B40C_LOG_WARP_THREADS) -#define B40C_LOG_MEM_BANKS(version) ((version >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla -#define B40C_MEM_BANKS(version) (1 << B40C_LOG_MEM_BANKS(version)) - -// TODO refactor these -#if __CUDA_ARCH__ >= 200 - #define FastMul(a, b) (a * b) -#else - #define FastMul(a, b) (__umul24(a, b)) -#endif - -#if __CUDA_ARCH__ >= 120 - #define WarpVoteAll(active_threads, predicate) (__all(predicate)) -#else - #define WarpVoteAll(active_threads, predicate) (EmulatedWarpVoteAll(predicate)) -#endif - -#if __CUDA_ARCH__ >= 200 - #define TallyWarpVote(active_threads, predicate, storage) (__popc(__ballot(predicate))) -#else - #define TallyWarpVote(active_threads, predicate, storage) (TallyWarpVoteSm10(predicate, storage)) -#endif - -#ifdef __LP64__ - #define _B40C_LP64_ true -#else - #define _B40C_LP64_ false -#endif - -#define _B40C_REG_MISER_QUALIFIER_ __shared__ - - -//------------------------------------------------------------------------------ -// Handy routines -//------------------------------------------------------------------------------ - - -/** - * Select maximum - */ -#define B40C_MAX(a, b) ((a > b) ? a : b) - - -/** - * MagnitudeShift(). Allows you to shift left for positive magnitude values, - * right for negative. - * - * N.B. This code is a little strange; we are using this meta-programming - * pattern of partial template specialization for structures in order to - * decide whether to shift left or right. Normally we would just use a - * conditional to decide if something was negative or not and then shift - * accordingly, knowing that the compiler will elide the untaken branch, - * i.e., the out-of-bounds shift during dead code elimination. However, - * the pass for bounds-checking shifts seems to happen before the DCE - * phase, which results in a an unsightly number of compiler warnings, so - * we force the issue earlier using structural template specialization. - */ - -template struct MagnitudeShiftOp; - -template -struct MagnitudeShiftOp { - __device__ __forceinline__ static K Shift(K key) { - return key << magnitude; - } -}; - -template -struct MagnitudeShiftOp { - __device__ __forceinline__ static K Shift(K key) { - return key >> magnitude; - } -}; - -template -__device__ __forceinline__ K MagnitudeShift(K key) { - return MagnitudeShiftOp 0) ? magnitude : magnitude * -1, (magnitude > 0)>::Shift(key); -} - - -/** - * Supress warnings for unused constants - */ -template -__device__ __forceinline__ void SuppressUnusedConstantWarning(const T) {} - - - - -//------------------------------------------------------------------------------ -// Common device routines -//------------------------------------------------------------------------------ - - -/** - * Perform a warp-synchrounous prefix scan. Allows for diverting a warp's - * threads into separate scan problems (multi-scan). - */ -template -__device__ __forceinline__ int WarpScan( - volatile int warpscan[][NUM_ELEMENTS], - int partial_reduction, - int copy_section) { - - int warpscan_idx; - if (MULTI_SCAN) { - warpscan_idx = threadIdx.x & (NUM_ELEMENTS - 1); - } else { - warpscan_idx = threadIdx.x; - } - - warpscan[1][warpscan_idx] = partial_reduction; - - if (NUM_ELEMENTS > 1) warpscan[1][warpscan_idx] = partial_reduction = - partial_reduction + warpscan[1][warpscan_idx - 1]; - if (NUM_ELEMENTS > 2) warpscan[1][warpscan_idx] = partial_reduction = - partial_reduction + warpscan[1][warpscan_idx - 2]; - if (NUM_ELEMENTS > 4) warpscan[1][warpscan_idx] = partial_reduction = - partial_reduction + warpscan[1][warpscan_idx - 4]; - if (NUM_ELEMENTS > 8) warpscan[1][warpscan_idx] = partial_reduction = - partial_reduction + warpscan[1][warpscan_idx - 8]; - if (NUM_ELEMENTS > 16) warpscan[1][warpscan_idx] = partial_reduction = - partial_reduction + warpscan[1][warpscan_idx - 16]; - - if (copy_section > 0) { - warpscan[1 + copy_section][warpscan_idx] = partial_reduction; - } - - return warpscan[1][warpscan_idx - 1]; -} - -/** - * Perform a warp-synchronous reduction - */ -template -__device__ __forceinline__ void WarpReduce( - int idx, - volatile int *storage, - int partial_reduction) -{ - storage[idx] = partial_reduction; - - if (NUM_ELEMENTS > 16) storage[idx] = partial_reduction = partial_reduction + storage[idx + 16]; - if (NUM_ELEMENTS > 8) storage[idx] = partial_reduction = partial_reduction + storage[idx + 8]; - if (NUM_ELEMENTS > 4) storage[idx] = partial_reduction = partial_reduction + storage[idx + 4]; - if (NUM_ELEMENTS > 2) storage[idx] = partial_reduction = partial_reduction + storage[idx + 2]; - if (NUM_ELEMENTS > 1) storage[idx] = partial_reduction = partial_reduction + storage[idx + 1]; -} - - -/** - * Tally a warp-vote regarding the given predicate using the supplied storage - */ -template -__device__ __forceinline__ int TallyWarpVoteSm10(int predicate, int storage[]) { - WarpReduce(threadIdx.x, storage, predicate); - return storage[0]; -} - - -/** - * Tally a warp-vote regarding the given predicate - */ -template -__device__ __forceinline__ int TallyWarpVoteSm10(int predicate) { - __shared__ int vote_reduction[B40C_WARP_THREADS]; - return TallyWarpVoteSm10(predicate, vote_reduction); -} - -/** - * Emulate the __all() warp vote instruction - */ -template -__device__ __forceinline__ int EmulatedWarpVoteAll(int predicate) { - return (TallyWarpVoteSm10(predicate) == ACTIVE_THREADS); -} - - -/** - * Have each thread concurrently perform a serial reduction over its specified segment - */ -template -__device__ __forceinline__ int -SerialReduce(int segment[]) { - - int reduce = segment[0]; - - #pragma unroll - for (int i = 1; i < (int) LENGTH; i++) { - reduce += segment[i]; - } - - return reduce; -} - - -/** - * Have each thread concurrently perform a serial scan over its specified segment - */ -template -__device__ __forceinline__ -void SerialScan(int segment[], int seed0) { - - int seed1; - - #pragma unroll - for (int i = 0; i < (int) LENGTH; i += 2) { - seed1 = segment[i] + seed0; - segment[i] = seed0; - seed0 = seed1 + segment[i + 1]; - segment[i + 1] = seed1; - } -} - - - - -//------------------------------------------------------------------------------ -// Empty Kernels -//------------------------------------------------------------------------------ - -template -__global__ void FlushKernel(void) -{ -} - - -} // end namespace b40c_thrust -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h deleted file mode 100644 index 2b199bb08a..0000000000 --- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_api.h +++ /dev/null @@ -1,807 +0,0 @@ -/****************************************************************************** - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - * - * - * AUTHORS' REQUEST: - * - * If you use|reference|benchmark this code, please cite our Technical - * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): - * - * @TechReport{ Merrill:Sorting:2010, - * author = "Duane Merrill and Andrew Grimshaw", - * title = "Revisiting Sorting for GPGPU Stream Architectures", - * year = "2010", - * institution = "University of Virginia, Department of Computer Science", - * address = "Charlottesville, VA, USA", - * number = "CS2010-03" - * } - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - ******************************************************************************/ - - - -/****************************************************************************** - * Radix Sorting API - * - * USAGE: - * - * Using the B40C radix sorting implementation is easy. Just #include this API - * file and its kernel include dependencies within your source. Below are two - * examples for using: - * - * (1) A keys-only example for sorting floats: - * - * // Create storage-management structure - * RadixSortStorage device_storage(d_float_keys); - * - * // Create and enact sorter - * RadixSortingEnactor sorter(d_float_keys_len); - * sorter.EnactSort(device_storage); - * - * // Re-acquire pointer to sorted keys, free unused/temp storage - * d_float_keys = device_storage.d_keys; - * device_storage.CleanupTempStorage(); - * - * (2) And a key-value example for sorting ints paired with doubles: - * - * // Create storage-management structure - * RadixSortStorage device_storage(d_int_keys, d_double_values); - * - * // Create and enact sorter - * RadixSortingEnactor sorter(d_int_keys_len); - * sorter.EnactSort(device_storage); - * - * // Re-acquire pointer to sorted keys and values, free unused/temp storage - * d_int_keys = device_storage.d_keys; - * d_double_values = device_storage.d_values; - * device_storage.CleanupTempStorage(); - * - * - ******************************************************************************/ - -#pragma once - -#include -#include -#include -#include -#include - -#include "radixsort_reduction_kernel.h" -#include "radixsort_spine_kernel.h" -#include "radixsort_scanscatter_kernel.h" - -#include - -namespace thrust { -namespace system { -namespace cuda { -namespace detail { -namespace detail { -namespace b40c_thrust { - - -/****************************************************************************** - * Debugging options - ******************************************************************************/ - -static bool RADIXSORT_DEBUG = false; - - - -/****************************************************************************** - * Structures for mananging device-side sorting state - ******************************************************************************/ - -/** - * Sorting storage-management structure for device vectors - */ -template -struct RadixSortStorage { - - // Device vector of keys to sort - K* d_keys; - - // Device vector of values to sort - V* d_values; - - // Ancillary device vector for key storage - K* d_alt_keys; - - // Ancillary device vector for value storage - V* d_alt_values; - - // Temporary device storage needed for radix sorting histograms - int *d_spine; - - // Flip-flopping temporary device storage denoting which digit place - // pass should read from which input source (i.e., false if reading from - // keys, true if reading from alternate_keys - bool *d_from_alt_storage; - - // Host-side boolean whether or not an odd number of sorting passes left the - // results in alternate storage. If so, the d_keys (and d_values) pointers - // will have been swapped with the d_alt_keys (and d_alt_values) pointers in order to - // point to the final results. - bool using_alternate_storage; - - // Constructor - RadixSortStorage(K* keys = NULL, V* values = NULL) - { - d_keys = keys; - d_values = values; - d_alt_keys = NULL; - d_alt_values = NULL; - d_spine = NULL; - d_from_alt_storage = NULL; - - using_alternate_storage = false; - } - - // Clean up non-results storage (may include freeing original storage if - // primary pointers were swizzled as per using_alternate_storage) - cudaError_t CleanupTempStorage() - { - if (d_alt_keys) cudaFree(d_alt_keys); - if (d_alt_values) cudaFree(d_alt_values); - if (d_spine) cudaFree(d_spine); - if (d_from_alt_storage) cudaFree(d_from_alt_storage); - - return cudaSuccess; - } -}; - - - -/****************************************************************************** - * Base class for sorting enactors - ******************************************************************************/ - - -/** - * Base class for SRTS radix sorting enactors. - */ -template -class BaseRadixSortingEnactor -{ -public: - - // Unsigned integer type suitable for radix sorting of keys - typedef typename KeyConversion::UnsignedBits ConvertedKeyType; - -protected: - - // - // Information about our problem configuration - // - - bool _keys_only; - unsigned int _num_elements; - int _cycle_elements; - int _spine_elements; - int _grid_size; - CtaDecomposition _work_decomposition; - int _passes; - bool _swizzle_pointers_for_odd_passes; - - // Information about our target device - cudaDeviceProp _device_props; - int _device_sm_version; - - // Information about our kernel assembly - int _kernel_ptx_version; - cudaFuncAttributes _spine_scan_kernel_attrs; - -protected: - - /** - * Constructor. - */ - BaseRadixSortingEnactor(int passes, int radix_bits, unsigned int num_elements, int max_grid_size, bool swizzle_pointers_for_odd_passes = true); - - /** - * Heuristic for determining the number of CTAs to launch. - * - * @param[in] max_grid_size - * Maximum allowable number of CTAs to launch. A value of 0 indicates - * that the default value should be used. - * - * @return The actual number of CTAs that should be launched - */ - int GridSize(int max_grid_size); - - /** - * Performs a distribution sorting pass over a single digit place - */ - template - cudaError_t DigitPlacePass(const RadixSortStorage &converted_storage); - - /** - * Enacts a sorting operation by performing the the appropriate - * digit-place passes. To be overloaded by specialized subclasses. - */ - virtual cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) = 0; - -public: - - /** - * Returns the length (in unsigned ints) of the device vector needed for - * temporary storage of the reduction spine. Useful if pre-allocating - * your own device storage (as opposed to letting EnactSort() allocate it - * for you). - */ - int SpineElements() { return _spine_elements; } - - /** - * Returns whether or not the problem will fit on the device. - */ - bool CanFit(); - - /** - * Enacts a radix sorting operation on the specified device data. - * - * IMPORTANT NOTES: The device storage backing the specified input vectors of - * keys (and data) will be modified. (I.e., treat this as an in-place sort.) - * - * Additionally, the pointers in the problem_storage structure may be updated - * (a) depending upon the number of digit-place sorting passes needed, and (b) - * whether or not the caller has already allocated temporary storage. - * - * The sorted results will always be referenced by problem_storage.d_keys (and - * problem_storage.d_values). However, for an odd number of sorting passes (uncommon) - * these results will actually be backed by the storage initially allocated for - * by problem_storage.d_alt_keys (and problem_storage.d_alt_values). If so, - * problem_storage.d_alt_keys and problem_storage.d_alt_keys will be updated to - * reference the original problem_storage.d_keys and problem_storage.d_values in order - * to facilitate cleanup. - * - * This means it is important to avoid keeping stale copies of device pointers - * to keys/data; you will want to re-reference the pointers in problem_storage. - * - * @param[in/out] problem_storage - * Device vectors of keys and values to sort, and ancillary storage - * needed by the sorting kernels. See the IMPORTANT NOTES above. - * - * The problem_storage.[alternate_keys|alternate_values|d_spine] fields are - * temporary storage needed by the sorting kernels. To facilitate - * speed, callers are welcome to re-use this storage for same-sized - * (or smaller) sortign problems. If NULL, these storage vectors will be - * allocated by this routine (and must be subsequently cuda-freed by - * the caller). - * - * @return cudaSuccess on success, error enumeration otherwise - */ - cudaError_t EnactSort(RadixSortStorage &problem_storage); - - /* - * Destructor - */ - virtual ~BaseRadixSortingEnactor() {} -}; - - - -template -BaseRadixSortingEnactor::BaseRadixSortingEnactor( - int passes, - int max_radix_bits, - unsigned int num_elements, - int max_grid_size, - bool swizzle_pointers_for_odd_passes) -{ - // - // Get current device properties - // - - int current_device; - cudaGetDevice(¤t_device); - cudaGetDeviceProperties(&_device_props, current_device); - _device_sm_version = _device_props.major * 100 + _device_props.minor * 10; - - - // - // Get SM version of compiled kernel assembly - // - cudaFuncGetAttributes(&_spine_scan_kernel_attrs, SrtsScanSpine); - _kernel_ptx_version = _spine_scan_kernel_attrs.ptxVersion * 10; - - - // - // Determine number of CTAs to launch, shared memory, cycle elements, etc. - // - - _passes = passes; - _num_elements = num_elements; - _keys_only = IsKeysOnly(); - _cycle_elements = B40C_RADIXSORT_CYCLE_ELEMENTS(_kernel_ptx_version , ConvertedKeyType, V); - _grid_size = GridSize(max_grid_size); - _swizzle_pointers_for_odd_passes = swizzle_pointers_for_odd_passes; - - int total_cycles = _num_elements / _cycle_elements; - unsigned int cycles_per_block = total_cycles / _grid_size; - unsigned int extra_cycles = total_cycles - (cycles_per_block * _grid_size); - - CtaDecomposition work_decomposition = { - extra_cycles, // num_big_blocks - (cycles_per_block + 1) * _cycle_elements, // big_block_elements - cycles_per_block * _cycle_elements, // normal_block_elements - _num_elements - (total_cycles * _cycle_elements), // extra_elements_last_block - _num_elements}; // num_elements - - _work_decomposition = work_decomposition; - - int spine_cycles = ((_grid_size * (1 << max_radix_bits)) + B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS - 1) / B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS; - _spine_elements = spine_cycles * B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS; -} - - - -template -int BaseRadixSortingEnactor::GridSize(int max_grid_size) -{ - const int SINGLE_CTA_CUTOFF = 0; // right now zero; we have no single-cta sorting - - // find maximum number of threadblocks if "use-default" - if (max_grid_size == 0) { - - if (_num_elements <= static_cast(SINGLE_CTA_CUTOFF)) { - - // The problem size is too small to warrant a two-level reduction: - // use only one stream-processor - max_grid_size = 1; - - } else { - - if (_device_sm_version <= 120) { - - // G80/G90 - max_grid_size = _device_props.multiProcessorCount * 4; - - } else if (_device_sm_version < 200) { - - // GT200 (has some kind of TLB or icache drama) - int orig_max_grid_size = _device_props.multiProcessorCount * B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(_kernel_ptx_version); - if (_keys_only) { - orig_max_grid_size *= (_num_elements + (1024 * 1024 * 96) - 1) / (1024 * 1024 * 96); - } else { - orig_max_grid_size *= (_num_elements + (1024 * 1024 * 64) - 1) / (1024 * 1024 * 64); - } - max_grid_size = orig_max_grid_size; - - if (_num_elements / _cycle_elements > static_cast(max_grid_size)) { - - double multiplier1 = 4.0; - double multiplier2 = 16.0; - - double delta1 = 0.068; - double delta2 = 0.127; - - int dividend = (_num_elements + _cycle_elements - 1) / _cycle_elements; - - while(true) { - - double quotient = ((double) dividend) / (multiplier1 * max_grid_size); - quotient -= (int) quotient; - - if ((quotient > delta1) && (quotient < 1 - delta1)) { - - quotient = ((double) dividend) / (multiplier2 * max_grid_size / 3.0); - quotient -= (int) quotient; - - if ((quotient > delta2) && (quotient < 1 - delta2)) { - break; - } - } - - if (max_grid_size == orig_max_grid_size - 2) { - max_grid_size = orig_max_grid_size - 30; - } else { - max_grid_size -= 1; - } - } - } - } else { - - // GF100 - max_grid_size = 418; - } - } - } - - // Calculate the actual number of threadblocks to launch. Initially - // assume that each threadblock will do only one cycle_elements worth - // of work, but then clamp it by the "max" restriction derived above - // in order to accomodate the "single-sp" and "saturated" cases. - - int grid_size = _num_elements / _cycle_elements; - if (grid_size == 0) { - grid_size = 1; - } - if (grid_size > max_grid_size) { - grid_size = max_grid_size; - } - - return grid_size; -} - - - -template -bool BaseRadixSortingEnactor:: -CanFit() -{ - long long bytes = (_num_elements * sizeof(K) * 2) + (_spine_elements * sizeof(int)); - if (!_keys_only) bytes += _num_elements * sizeof(V) * 2; - - if (_device_props.totalGlobalMem < 1024 * 1024 * 513) { - return (bytes < ((double) _device_props.totalGlobalMem) * 0.81); // allow up to 81% capacity for 512MB - } - - return (bytes < ((double) _device_props.totalGlobalMem) * 0.89); // allow up to 90% capacity -} - - - -template -template -cudaError_t BaseRadixSortingEnactor:: -DigitPlacePass(const RadixSortStorage &converted_storage) -{ - int threads = B40C_RADIXSORT_THREADS; - int dynamic_smem; - - cudaFuncAttributes reduce_kernel_attrs, scan_scatter_attrs; - cudaFuncGetAttributes(&reduce_kernel_attrs, RakingReduction); - cudaFuncGetAttributes(&scan_scatter_attrs, ScanScatterDigits); - - // - // Counting Reduction - // - - // Run tesla flush kernel if we have two or more threadblocks for each of the SMs - if ((_device_sm_version == 130) && (_work_decomposition.num_elements > static_cast(_device_props.multiProcessorCount * _cycle_elements * 2))) { - FlushKernel<<<_grid_size, B40C_RADIXSORT_THREADS, scan_scatter_attrs.sharedSizeBytes>>>(); - synchronize_if_enabled("FlushKernel"); - } - - // GF100 and GT200 get the same smem allocation for every kernel launch (pad the reduction/top-level-scan kernels) - dynamic_smem = (_kernel_ptx_version >= 130) ? scan_scatter_attrs.sharedSizeBytes - reduce_kernel_attrs.sharedSizeBytes : 0; - - RakingReduction <<<_grid_size, threads, dynamic_smem>>>( - converted_storage.d_from_alt_storage, - converted_storage.d_spine, - converted_storage.d_keys, - converted_storage.d_alt_keys, - _work_decomposition); - synchronize_if_enabled("RakingReduction"); - - - // - // Spine - // - - // GF100 and GT200 get the same smem allocation for every kernel launch (pad the reduction/top-level-scan kernels) - dynamic_smem = (_kernel_ptx_version >= 130) ? scan_scatter_attrs.sharedSizeBytes - _spine_scan_kernel_attrs.sharedSizeBytes : 0; - - SrtsScanSpine<<<_grid_size, B40C_RADIXSORT_SPINE_THREADS, dynamic_smem>>>( - converted_storage.d_spine, - converted_storage.d_spine, - _spine_elements); - synchronize_if_enabled("SrtsScanSpine"); - - - // - // Scanning Scatter - // - - // Run tesla flush kernel if we have two or more threadblocks for each of the SMs - if ((_device_sm_version == 130) && (_work_decomposition.num_elements > static_cast(_device_props.multiProcessorCount * _cycle_elements * 2))) { - FlushKernel<<<_grid_size, B40C_RADIXSORT_THREADS, scan_scatter_attrs.sharedSizeBytes>>>(); - synchronize_if_enabled("FlushKernel"); - } - - ScanScatterDigits <<<_grid_size, threads, 0>>>( - converted_storage.d_from_alt_storage, - converted_storage.d_spine, - converted_storage.d_keys, - converted_storage.d_alt_keys, - converted_storage.d_values, - converted_storage.d_alt_values, - _work_decomposition); - synchronize_if_enabled("ScanScatterDigits"); - - return cudaSuccess; -} - - - -template -cudaError_t BaseRadixSortingEnactor:: -EnactSort(RadixSortStorage &problem_storage) -{ - // - // Allocate device memory for temporary storage (if necessary) - // - - if (problem_storage.d_alt_keys == NULL) { - cudaMalloc((void**) &problem_storage.d_alt_keys, _num_elements * sizeof(K)); - } - if (!_keys_only && (problem_storage.d_alt_values == NULL)) { - cudaMalloc((void**) &problem_storage.d_alt_values, _num_elements * sizeof(V)); - } - if (problem_storage.d_spine == NULL) { - cudaMalloc((void**) &problem_storage.d_spine, _spine_elements * sizeof(int)); - } - if (problem_storage.d_from_alt_storage == NULL) { - cudaMalloc((void**) &problem_storage.d_from_alt_storage, 2 * sizeof(bool)); - } - - // Determine suitable type of unsigned byte storage to use for keys - typedef typename KeyConversion::UnsignedBits ConvertedKeyType; - - // Copy storage pointers to an appropriately typed stucture - RadixSortStorage converted_storage; - memcpy(&converted_storage, &problem_storage, sizeof(RadixSortStorage)); - - // - // Enact the sorting operation - // - - if (RADIXSORT_DEBUG) { - - printf("_device_sm_version: %d, _kernel_ptx_version: %d\n", _device_sm_version, _kernel_ptx_version); - printf("Bottom-level reduction & scan kernels:\n\tgrid_size: %d, \n\tthreads: %d, \n\tcycle_elements: %d, \n\tnum_big_blocks: %d, \n\tbig_block_elements: %d, \n\tnormal_block_elements: %d\n\textra_elements_last_block: %d\n\n", - _grid_size, B40C_RADIXSORT_THREADS, _cycle_elements, _work_decomposition.num_big_blocks, _work_decomposition.big_block_elements, _work_decomposition.normal_block_elements, _work_decomposition.extra_elements_last_block); - printf("Top-level spine scan:\n\tgrid_size: %d, \n\tthreads: %d, \n\tspine_block_elements: %d\n\n", - _grid_size, B40C_RADIXSORT_SPINE_THREADS, _spine_elements); - } - - cudaError_t retval = EnactDigitPlacePasses(converted_storage); - - - // - // Swizzle pointers if we left our sorted output in temp storage - // - - if (_swizzle_pointers_for_odd_passes) { - - cudaMemcpy( - &problem_storage.using_alternate_storage, - &problem_storage.d_from_alt_storage[_passes & 0x1], - sizeof(bool), - cudaMemcpyDeviceToHost); - - if (problem_storage.using_alternate_storage) { - thrust::swap(problem_storage.d_keys, problem_storage.d_alt_keys); - if (!_keys_only) { - thrust::swap(problem_storage.d_values, problem_storage.d_alt_values); - } - } - } - - return retval; -} - - - - - -/****************************************************************************** - * Sorting enactor classes - ******************************************************************************/ - -/** - * Generic sorting enactor class. Simply create an instance of this class - * with your key-type K (and optionally value-type V if sorting with satellite - * values). - * - * Template specialization provides the appropriate enactor instance to handle - * the specified data types. - * - * @template-param K - * Type of keys to be sorted - * - * @template-param V - * Type of values to be sorted. - * - * @template-param ConvertedKeyType - * Leave as default to effect necessary enactor specialization. - */ -template ::UnsignedBits> -class RadixSortingEnactor; - - - -/** - * Sorting enactor that is specialized for for 8-bit key types - */ -template -class RadixSortingEnactor : public BaseRadixSortingEnactor -{ -protected: - - typedef BaseRadixSortingEnactor Base; - typedef typename Base::ConvertedKeyType ConvertedKeyType; - - cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) - { - Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<1, 4, 4, NopFunctor, PostprocessKeyFunctor > (converted_storage); - - return cudaSuccess; - } - -public: - - /** - * Constructor. - * - * @param[in] num_elements - * Length (in elements) of the input to a sorting operation - * - * @param[in] max_grid_size - * Maximum allowable number of CTAs to launch. The default value of 0 indicates - * that the dispatch logic should select an appropriate value for the target device. - */ - RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(2, 4, num_elements, max_grid_size) {} - -}; - - - -/** - * Sorting enactor that is specialized for for 16-bit key types - */ -template -class RadixSortingEnactor : public BaseRadixSortingEnactor -{ -protected: - - typedef BaseRadixSortingEnactor Base; - typedef typename Base::ConvertedKeyType ConvertedKeyType; - - cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) - { - Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<1, 4, 4, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<2, 4, 8, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<3, 4, 12, NopFunctor, PostprocessKeyFunctor > (converted_storage); - - return cudaSuccess; - } - -public: - - /** - * Constructor. - * - * @param[in] num_elements - * Length (in elements) of the input to a sorting operation - * - * @param[in] max_grid_size - * Maximum allowable number of CTAs to launch. The default value of 0 indicates - * that the dispatch logic should select an appropriate value for the target device. - */ - RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(4, 4, num_elements, max_grid_size) {} - -}; - - -/** - * Sorting enactor that is specialized for for 32-bit key types - */ -template -class RadixSortingEnactor : public BaseRadixSortingEnactor -{ -protected: - - typedef BaseRadixSortingEnactor Base; - typedef typename Base::ConvertedKeyType ConvertedKeyType; - - cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) - { - Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<1, 4, 4, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<2, 4, 8, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<3, 4, 12, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<4, 4, 16, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<5, 4, 20, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<6, 4, 24, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<7, 4, 28, NopFunctor, PostprocessKeyFunctor > (converted_storage); - - return cudaSuccess; - } - -public: - - /** - * Constructor. - * - * @param[in] num_elements - * Length (in elements) of the input to a sorting operation - * - * @param[in] max_grid_size - * Maximum allowable number of CTAs to launch. The default value of 0 indicates - * that the dispatch logic should select an appropriate value for the target device. - */ - RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(8, 4, num_elements, max_grid_size) {} - -}; - - - -/** - * Sorting enactor that is specialized for for 64-bit key types - */ -template -class RadixSortingEnactor : public BaseRadixSortingEnactor -{ -protected: - - typedef BaseRadixSortingEnactor Base; - typedef typename Base::ConvertedKeyType ConvertedKeyType; - - cudaError_t EnactDigitPlacePasses(const RadixSortStorage &converted_storage) - { - Base::template DigitPlacePass<0, 4, 0, PreprocessKeyFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<1, 4, 4, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<2, 4, 8, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<3, 4, 12, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<4, 4, 16, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<5, 4, 20, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<6, 4, 24, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<7, 4, 28, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<8, 4, 32, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<9, 4, 36, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<10, 4, 40, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<11, 4, 44, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<12, 4, 48, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<13, 4, 52, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<14, 4, 56, NopFunctor, NopFunctor >(converted_storage); - Base::template DigitPlacePass<15, 4, 60, NopFunctor, PostprocessKeyFunctor > (converted_storage); - - return cudaSuccess; - } - -public: - - /** - * Constructor. - * - * @param[in] num_elements - * Length (in elements) of the input to a sorting operation - * - * @param[in] max_grid_size - * Maximum allowable number of CTAs to launch. The default value of 0 indicates - * that the dispatch logic should select an appropriate value for the target device. - */ - RadixSortingEnactor(unsigned int num_elements, int max_grid_size = 0) : Base::BaseRadixSortingEnactor(16, 4, num_elements, max_grid_size) {} - -}; - - -} // end namespace b40c_thrust -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h deleted file mode 100644 index 7899dc3c0e..0000000000 --- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_kernel_common.h +++ /dev/null @@ -1,173 +0,0 @@ -/****************************************************************************** - * - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - * - * - * AUTHORS' REQUEST: - * - * If you use|reference|benchmark this code, please cite our Technical - * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): - * - * @TechReport{ Merrill:Sorting:2010, - * author = "Duane Merrill and Andrew Grimshaw", - * title = "Revisiting Sorting for GPGPU Stream Architectures", - * year = "2010", - * institution = "University of Virginia, Department of Computer Science", - * address = "Charlottesville, VA, USA", - * number = "CS2010-03" - * } - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - * - ******************************************************************************/ - - -/****************************************************************************** - * Configuration management for B40C radix sorting kernels - ******************************************************************************/ - -#pragma once - -#include "kernel_utils.h" -#include "vector_types.h" -#include "radixsort_key_conversion.h" - -namespace thrust { -namespace system { -namespace cuda { -namespace detail { -namespace detail { -namespace b40c_thrust { - - -/****************************************************************************** - * Radix sorting configuration - ******************************************************************************/ - -// 128 threads -#define B40C_RADIXSORT_LOG_THREADS 7 -#define B40C_RADIXSORT_THREADS (1 << B40C_RADIXSORT_LOG_THREADS) - -// Target threadblock occupancy for counting/reduction kernel -#define B40C_SM20_REDUCE_CTA_OCCUPANCY() (8) // 8 threadblocks on GF100 -#define B40C_SM12_REDUCE_CTA_OCCUPANCY() (5) // 5 threadblocks on GT200 -#define B40C_SM10_REDUCE_CTA_OCCUPANCY() (3) // 4 threadblocks on G80 -#define B40C_RADIXSORT_REDUCE_CTA_OCCUPANCY(version) ((version >= 200) ? B40C_SM20_REDUCE_CTA_OCCUPANCY() : \ - (version >= 120) ? B40C_SM12_REDUCE_CTA_OCCUPANCY() : \ - B40C_SM10_REDUCE_CTA_OCCUPANCY()) - -// Target threadblock occupancy for bulk scan/scatter kernel -#define B40C_SM20_SCAN_SCATTER_CTA_OCCUPANCY() (7) // 7 threadblocks on GF100 -#define B40C_SM12_SCAN_SCATTER_CTA_OCCUPANCY() (5) // 5 threadblocks on GT200 -#define B40C_SM10_SCAN_SCATTER_CTA_OCCUPANCY() (2) // 2 threadblocks on G80 -#define B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(version) ((version >= 200) ? B40C_SM20_SCAN_SCATTER_CTA_OCCUPANCY() : \ - (version >= 120) ? B40C_SM12_SCAN_SCATTER_CTA_OCCUPANCY() : \ - B40C_SM10_SCAN_SCATTER_CTA_OCCUPANCY()) - -// Number of 256-element sets to rake per raking pass -#define B40C_SM20_LOG_SETS_PER_PASS() (1) // 2 sets on GF100 -#define B40C_SM12_LOG_SETS_PER_PASS() (0) // 1 set on GT200 -#define B40C_SM10_LOG_SETS_PER_PASS() (1) // 2 sets on G80 -#define B40C_RADIXSORT_LOG_SETS_PER_PASS(version) ((version >= 200) ? B40C_SM20_LOG_SETS_PER_PASS() : \ - (version >= 120) ? B40C_SM12_LOG_SETS_PER_PASS() : \ - B40C_SM10_LOG_SETS_PER_PASS()) - -// Number of raking passes per cycle -#define B40C_SM20_LOG_PASSES_PER_CYCLE(K, V) (((B40C_MAX(sizeof(K), sizeof(V)) > 4) || _B40C_LP64_) ? 0 : 1) // 2 passes on GF100 (only one for large keys/values, or for 64-bit device pointers) -#define B40C_SM12_LOG_PASSES_PER_CYCLE(K, V) (B40C_MAX(sizeof(K), sizeof(V)) > 4 ? 0 : 1) // 2 passes on GT200 (only for large keys/values) -#define B40C_SM10_LOG_PASSES_PER_CYCLE(K, V) (0) // 1 pass on G80 -#define B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(version, K, V) ((version >= 200) ? B40C_SM20_LOG_PASSES_PER_CYCLE(K, V) : \ - (version >= 120) ? B40C_SM12_LOG_PASSES_PER_CYCLE(K, V) : \ - B40C_SM10_LOG_PASSES_PER_CYCLE(K, V)) - - -// Number of raking threads per raking pass -#define B40C_SM20_LOG_RAKING_THREADS_PER_PASS() (B40C_LOG_WARP_THREADS + 1) // 2 raking warps on GF100 -#define B40C_SM12_LOG_RAKING_THREADS_PER_PASS() (B40C_LOG_WARP_THREADS) // 1 raking warp on GT200 -#define B40C_SM10_LOG_RAKING_THREADS_PER_PASS() (B40C_LOG_WARP_THREADS + 2) // 4 raking warps on G80 -#define B40C_RADIXSORT_LOG_RAKING_THREADS_PER_PASS(version) ((version >= 200) ? B40C_SM20_LOG_RAKING_THREADS_PER_PASS() : \ - (version >= 120) ? B40C_SM12_LOG_RAKING_THREADS_PER_PASS() : \ - B40C_SM10_LOG_RAKING_THREADS_PER_PASS()) - - -// Number of elements per cycle -#define B40C_RADIXSORT_LOG_CYCLE_ELEMENTS(version, K, V) (B40C_RADIXSORT_LOG_SETS_PER_PASS(version) + B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(version, K, V) + B40C_RADIXSORT_LOG_THREADS + 1) -#define B40C_RADIXSORT_CYCLE_ELEMENTS(version, K, V) (1 << B40C_RADIXSORT_LOG_CYCLE_ELEMENTS(version, K, V)) - -// Number of warps per CTA -#define B40C_RADIXSORT_LOG_WARPS (B40C_RADIXSORT_LOG_THREADS - B40C_LOG_WARP_THREADS) -#define B40C_RADIXSORT_WARPS (1 << B40C_RADIXSORT_LOG_WARPS) - -// Number of threads for spine-scanning kernel -#define B40C_RADIXSORT_LOG_SPINE_THREADS 7 // 128 threads -#define B40C_RADIXSORT_SPINE_THREADS (1 << B40C_RADIXSORT_LOG_SPINE_THREADS) - -// Number of elements per spine-scanning cycle -#define B40C_RADIXSORT_LOG_SPINE_CYCLE_ELEMENTS 9 // 512 elements -#define B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS (1 << B40C_RADIXSORT_LOG_SPINE_CYCLE_ELEMENTS) - - - -/****************************************************************************** - * SRTS Control Structures - ******************************************************************************/ - - -/** - * Value-type structure denoting keys-only sorting - */ -struct KeysOnlyType {}; - -/** - * Returns whether or not the templated type indicates keys-only sorting - */ -template -inline __host__ __device__ bool IsKeysOnly() {return false;} - - -/** - * Returns whether or not the templated type indicates keys-only sorting - */ -template <> -inline __host__ __device__ bool IsKeysOnly() {return true;} - - -/** - * A given threadblock may receive one of three different amounts of - * work: "big", "normal", and "last". The big workloads are one - * cycle_elements greater than the normal, and the last workload - * does the extra (problem-size % cycle_elements) work. - */ -struct CtaDecomposition { - unsigned int num_big_blocks; - unsigned int big_block_elements; - unsigned int normal_block_elements; - unsigned int extra_elements_last_block; - unsigned int num_elements; -}; - - -} // end namespace b40c_thrust -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h deleted file mode 100644 index a170f95e6c..0000000000 --- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_key_conversion.h +++ /dev/null @@ -1,352 +0,0 @@ -/****************************************************************************** - * - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - * - * - * AUTHORS' REQUEST: - * - * If you use|reference|benchmark this code, please cite our Technical - * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): - * - * @TechReport{ Merrill:Sorting:2010, - * author = "Duane Merrill and Andrew Grimshaw", - * title = "Revisiting Sorting for GPGPU Stream Architectures", - * year = "2010", - * institution = "University of Virginia, Department of Computer Science", - * address = "Charlottesville, VA, USA", - * number = "CS2010-03" - * } - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - * - ******************************************************************************/ - - -/****************************************************************************** - * Functors for converting signed and floating point types to unsigned types - * suitable for radix sorting - ******************************************************************************/ - -#pragma once - -namespace thrust { -namespace system { -namespace cuda { -namespace detail { -namespace detail { -namespace b40c_thrust { - - -// -// Do-nothing functors -// - -template -struct NopFunctor{ - template - __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {} - __device__ __host__ __forceinline__ static bool MustApply(){ return false;} -}; - -// -// Do-nothing functors that indicate a mandatory pass -// - -template -struct MandatoryPassNopFunctor{ - template - __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {} - __device__ __host__ __forceinline__ static bool MustApply(){ return false;} -}; - - -// -// Conversion for generic unsigned types -// - -template struct KeyConversion { - typedef T UnsignedBits; -}; - -template -struct PreprocessKeyFunctor{ - template - __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {} - __device__ __host__ __forceinline__ static bool MustApply(){ return false;} -}; - -template -struct PostprocessKeyFunctor { - template - __device__ __host__ __forceinline__ void operator()(ConvertedKeyType &converted_key) {} - __device__ __host__ __forceinline__ static bool MustApply(){ return false;} -}; - - - -// -// Conversion for floats -// - -template <> struct KeyConversion { - typedef unsigned int UnsignedBits; -}; - -template <> -struct PreprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) { - - unsigned int mask = (converted_key & 0x80000000) ? 0xffffffff : 0x80000000; - converted_key ^= mask; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - -template <> -struct PostprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) { - - unsigned int mask = (converted_key & 0x80000000) ? 0x80000000 : 0xffffffff; - converted_key ^= mask; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - - - -// -// Conversion for doubles -// - -template <> struct KeyConversion { - typedef unsigned long long UnsignedBits; -}; - -template <> -struct PreprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) { - - unsigned long long mask = (converted_key & 0x8000000000000000) ? 0xffffffffffffffff : 0x8000000000000000; - converted_key ^= mask; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - -template <> -struct PostprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) { - unsigned long long mask = (converted_key & 0x8000000000000000) ? 0x8000000000000000 : 0xffffffffffffffff; - converted_key ^= mask; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - - -// -// Conversion for signed chars -// - -template <> struct KeyConversion { - typedef unsigned char UnsignedBits; -}; - -template <> -struct PreprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) { - // char is unsigned on some platforms, so we have to check - if(std::numeric_limits::is_signed) - { - const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1); - converted_key ^= SIGN_MASK; - } - } - __device__ __host__ __forceinline__ static bool MustApply(){ return std::numeric_limits::is_signed;} -}; - -template <> -struct PostprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) { - // char is unsigned on some platforms, so we have to check - if(std::numeric_limits::is_signed) - { - const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1); - converted_key ^= SIGN_MASK; - } - } - __device__ __host__ __forceinline__ static bool MustApply(){ return std::numeric_limits::is_signed;} -}; - - -// TODO handle this more gracefully -template <> struct KeyConversion { - typedef unsigned char UnsignedBits; -}; - -template <> -struct PreprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) { - const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - -template <> -struct PostprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned char &converted_key) { - const unsigned int SIGN_MASK = 1u << ((sizeof(char) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - - -// -// Conversion for signed shorts -// - -template <> struct KeyConversion { - typedef unsigned short UnsignedBits; -}; - -template <> -struct PreprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned short &converted_key) { - const unsigned int SIGN_MASK = 1u << ((sizeof(short) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - -template <> -struct PostprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned short &converted_key) { - const unsigned int SIGN_MASK = 1u << ((sizeof(short) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - - - -// -// Conversion for signed ints -// - -template <> struct KeyConversion { - typedef unsigned int UnsignedBits; -}; - -template <> -struct PreprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) { - const unsigned int SIGN_MASK = 1u << ((sizeof(int) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - -template <> -struct PostprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned int &converted_key) { - const unsigned int SIGN_MASK = 1u << ((sizeof(int) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - - - -// -// Conversion for signed longs -// - -// TODO rework this with metaprogramming -template <> struct KeyConversion { -#if ULONG_MAX == UINT_MAX - typedef unsigned int UnsignedBits; -#else - typedef unsigned long long UnsignedBits; -#endif -}; - -// TODO rework this with metaprogramming -template <> struct KeyConversion { -#if ULONG_MAX == UINT_MAX - typedef unsigned int UnsignedBits; -#else - typedef unsigned long long UnsignedBits; -#endif -}; - -template <> -struct PreprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(typename KeyConversion::UnsignedBits& converted_key) { - const typename KeyConversion::UnsignedBits SIGN_MASK = 1ul << ((sizeof(long) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - -template <> -struct PostprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(typename KeyConversion::UnsignedBits& converted_key) { - const typename KeyConversion::UnsignedBits SIGN_MASK = 1ul << ((sizeof(long) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - - - -// -// Conversion for signed long longs -// - -template <> struct KeyConversion { - typedef unsigned long long UnsignedBits; -}; - -template <> -struct PreprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) { - const unsigned long long SIGN_MASK = 1ull << ((sizeof(long long) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - -template <> -struct PostprocessKeyFunctor { - __device__ __host__ __forceinline__ void operator()(unsigned long long &converted_key) { - const unsigned long long SIGN_MASK = 1ull << ((sizeof(long long) * 8) - 1); - converted_key ^= SIGN_MASK; - } - __device__ __host__ __forceinline__ static bool MustApply(){ return true;} -}; - - -} // end namespace b40c_thrust -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h deleted file mode 100644 index a8f91d3d24..0000000000 --- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_reduction_kernel.h +++ /dev/null @@ -1,439 +0,0 @@ -/****************************************************************************** - * - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - * - * - * AUTHORS' REQUEST: - * - * If you use|reference|benchmark this code, please cite our Technical - * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): - * - * @TechReport{ Merrill:Sorting:2010, - * author = "Duane Merrill and Andrew Grimshaw", - * title = "Revisiting Sorting for GPGPU Stream Architectures", - * year = "2010", - * institution = "University of Virginia, Department of Computer Science", - * address = "Charlottesville, VA, USA", - * number = "CS2010-03" - * } - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - * - ******************************************************************************/ - - -/****************************************************************************** - * Bottom-level digit-reduction/counting kernel - ******************************************************************************/ - -#pragma once - -#include "radixsort_kernel_common.h" - -namespace thrust { -namespace system { -namespace cuda { -namespace detail { -namespace detail { -namespace b40c_thrust { - -/****************************************************************************** - * Defines - ******************************************************************************/ - -const int BYTE_ENCODE_SHIFT = 0x3; - - -/****************************************************************************** - * Cycle-processing Routines - ******************************************************************************/ - -__device__ __forceinline__ int DecodeInt(int encoded, int quad_byte){ - return (encoded >> quad_byte) & 0xff; // shift right 8 bits per digit and return rightmost 8 bits -} - - -__device__ __forceinline__ int EncodeInt(int count, int quad_byte) { - return count << quad_byte; // shift left 8 bits per digit -} - - -template -__device__ __forceinline__ void DecodeDigit( - K key, - int &lane, - int &quad_shift) -{ - const K DIGIT_MASK = RADIX_DIGITS - 1; - lane = (key & (DIGIT_MASK << BIT)) >> (BIT + 2); - - const K QUAD_MASK = (RADIX_DIGITS < 4) ? 0x1 : 0x3; - if (BIT == 32) { - // N.B.: This takes one more instruction than the code below it, but - // otherwise the compiler goes nuts and shoves hundreds of bytes - // to lmem when bit = 32 on 64-bit keys. - quad_shift = ((key >> BIT) & QUAD_MASK) << BYTE_ENCODE_SHIFT; - } else { - quad_shift = MagnitudeShift(key & (QUAD_MASK << BIT)); - } -} - - -template -__device__ __forceinline__ void ReduceEncodedCounts( - int local_counts[LANES_PER_WARP][4], - int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) -{ - const int LOG_PARTIALS_PER_THREAD = B40C_RADIXSORT_LOG_THREADS - B40C_LOG_WARP_THREADS; - const int PARTIALS_PER_THREAD = 1 << LOG_PARTIALS_PER_THREAD; - - int encoded; - int idx = threadIdx.x & (B40C_WARP_THREADS - 1); - - - __syncthreads(); - - #pragma unroll - for (int j = 0; j < (int) LANES_PER_WARP; j++) { - - int warp_id = (threadIdx.x >> B40C_LOG_WARP_THREADS) + (j * B40C_RADIXSORT_WARPS); - if (warp_id < SCAN_LANES) { - - // rest of my elements - #pragma unroll - for (int i = 0; i < (int) PARTIALS_PER_THREAD; i++) { - encoded = encoded_carry[warp_id][idx + (i * B40C_WARP_THREADS)]; - local_counts[j][0] += DecodeInt(encoded, 0u << BYTE_ENCODE_SHIFT); - local_counts[j][1] += DecodeInt(encoded, 1u << BYTE_ENCODE_SHIFT); - local_counts[j][2] += DecodeInt(encoded, 2u << BYTE_ENCODE_SHIFT); - local_counts[j][3] += DecodeInt(encoded, 3u << BYTE_ENCODE_SHIFT); - } - - if (FINAL_REDUCE) { - // reduce all four packed fields, leaving them in the first four elements of our row - WarpReduce(idx, &encoded_carry[warp_id][0], local_counts[j][0]); - WarpReduce(idx, &encoded_carry[warp_id][1], local_counts[j][1]); - WarpReduce(idx, &encoded_carry[warp_id][2], local_counts[j][2]); - WarpReduce(idx, &encoded_carry[warp_id][3], local_counts[j][3]); - } - } - } - - __syncthreads(); - -} - - -template -__device__ __forceinline__ void Bucket( - K input, - int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS], - PreprocessFunctor preprocess = PreprocessFunctor()) -{ - int lane, quad_shift; - preprocess(input); - DecodeDigit(input, lane, quad_shift); - encoded_carry[lane][threadIdx.x] += EncodeInt(1, quad_shift); -} - - -template -struct LoadOp; - -template -struct LoadOp -{ - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - K key = d_in_keys[offset + threadIdx.x]; - Bucket(key, encoded_carry); - } -}; - -template -struct LoadOp -{ - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 1), encoded_carry); - } -}; - -template -struct LoadOp -{ - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 2), encoded_carry); - } -}; - -template -struct LoadOp -{ - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - K keys[8]; - - keys[0] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 0) + threadIdx.x]; - keys[1] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 1) + threadIdx.x]; - keys[2] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 2) + threadIdx.x]; - keys[3] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 3) + threadIdx.x]; - - if (B40C_FERMI(__CUDA_ARCH__)) __syncthreads(); - - keys[4] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 4) + threadIdx.x]; - keys[5] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 5) + threadIdx.x]; - keys[6] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 6) + threadIdx.x]; - keys[7] = d_in_keys[offset + (B40C_RADIXSORT_THREADS * 7) + threadIdx.x]; - - Bucket(keys[0], encoded_carry); - Bucket(keys[1], encoded_carry); - Bucket(keys[2], encoded_carry); - Bucket(keys[3], encoded_carry); - Bucket(keys[4], encoded_carry); - Bucket(keys[5], encoded_carry); - Bucket(keys[6], encoded_carry); - Bucket(keys[7], encoded_carry); - } -}; - -template -struct LoadOp { - - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 8), encoded_carry); - } -}; - -template -struct LoadOp { - - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 16), encoded_carry); - } -}; - -template -struct LoadOp { - - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 32), encoded_carry); - } -}; - -template -struct LoadOp { - - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 64), encoded_carry); - } -}; - -template -struct LoadOp { - - static __device__ __forceinline__ void BlockOfLoads(K *d_in_keys, int offset, int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) - { - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 0), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 128), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 192), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 224), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 240), encoded_carry); - LoadOp::BlockOfLoads(d_in_keys, offset + (B40C_RADIXSORT_THREADS * 248), encoded_carry); - } -}; - - -template -__device__ __forceinline__ void ResetEncodedCarry( - int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]) -{ - #pragma unroll - for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES; SCAN_LANE++) { - encoded_carry[SCAN_LANE][threadIdx.x] = 0; - } -} - - -template -__device__ __forceinline__ int ProcessLoads( - K *d_in_keys, - int loads, - int &offset, - int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS], - int local_counts[LANES_PER_WARP][4]) -{ - // Unroll batches of loads with occasional reduction to avoid overflow - while (loads >= 32) { - - LoadOp::BlockOfLoads(d_in_keys, offset, encoded_carry); - offset += B40C_RADIXSORT_THREADS * 32; - loads -= 32; - - // Reduce int local count registers to prevent overflow - ReduceEncodedCounts( - local_counts, - encoded_carry); - - // Reset encoded counters - ResetEncodedCarry(encoded_carry); - } - - int retval = loads; - - // Wind down loads in decreasing batch sizes - - while (loads >= 4) { - LoadOp::BlockOfLoads(d_in_keys, offset, encoded_carry); - offset += B40C_RADIXSORT_THREADS * 4; - loads -= 4; - } - - while (loads) { - LoadOp::BlockOfLoads(d_in_keys, offset, encoded_carry); - offset += B40C_RADIXSORT_THREADS * 1; - loads--; - } - - return retval; -} - - -/****************************************************************************** - * Reduction/counting Kernel Entry Point - ******************************************************************************/ - -template -__launch_bounds__ (B40C_RADIXSORT_THREADS, B40C_RADIXSORT_REDUCE_CTA_OCCUPANCY(__CUDA_ARCH__)) -__global__ -void RakingReduction( - bool *d_from_alt_storage, - int *d_spine, - K *d_in_keys, - K *d_out_keys, - CtaDecomposition work_decomposition) -{ - const int RADIX_DIGITS = 1 << RADIX_BITS; - - const int LOG_SCAN_LANES = (RADIX_BITS >= 2) ? RADIX_BITS - 2 : 0; // Always at least one fours group - const int SCAN_LANES = 1 << LOG_SCAN_LANES; - - const int LOG_LANES_PER_WARP = (SCAN_LANES > B40C_RADIXSORT_WARPS) ? LOG_SCAN_LANES - B40C_RADIXSORT_LOG_WARPS : 0; // Always at least one fours group per warp - const int LANES_PER_WARP = 1 << LOG_LANES_PER_WARP; - - - // Each thread gets its own column of fours-groups (for conflict-free updates) - __shared__ int encoded_carry[SCAN_LANES][B40C_RADIXSORT_THREADS]; - - // Each thread is also responsible for aggregating an unencoded segment of a fours-group - int local_counts[LANES_PER_WARP][4]; - - // Determine where to read our input - bool from_alt_storage = (PASS == 0) ? false : d_from_alt_storage[PASS & 0x1]; - if (from_alt_storage) d_in_keys = d_out_keys; - - // Calculate our threadblock's range - int offset, block_elements; - if (blockIdx.x < work_decomposition.num_big_blocks) { - offset = work_decomposition.big_block_elements * blockIdx.x; - block_elements = work_decomposition.big_block_elements; - } else { - offset = (work_decomposition.normal_block_elements * blockIdx.x) + (work_decomposition.num_big_blocks * B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V)); - block_elements = work_decomposition.normal_block_elements; - } - - // Initialize local counts - #pragma unroll - for (int LANE = 0; LANE < (int) LANES_PER_WARP; LANE++) { - local_counts[LANE][0] = 0; - local_counts[LANE][1] = 0; - local_counts[LANE][2] = 0; - local_counts[LANE][3] = 0; - } - - // Reset encoded counters - ResetEncodedCarry(encoded_carry); - - // Process loads - int loads = block_elements >> B40C_RADIXSORT_LOG_THREADS; - int unreduced_loads = ProcessLoads( - d_in_keys, - loads, - offset, - encoded_carry, - local_counts); - - // Cleanup if we're the last block - if ((blockIdx.x == gridDim.x - 1) && (work_decomposition.extra_elements_last_block)) { - - const int LOADS_PER_CYCLE = B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) / B40C_RADIXSORT_THREADS; - - // If extra guarded loads may cause overflow, reduce now and reset counters - if (unreduced_loads + LOADS_PER_CYCLE > 255) { - - ReduceEncodedCounts( - local_counts, - encoded_carry); - - ResetEncodedCarry(encoded_carry); - } - - // perform up to LOADS_PER_CYCLE extra guarded loads - #pragma unroll - for (int EXTRA_LOAD = 0; EXTRA_LOAD < (int) LOADS_PER_CYCLE; EXTRA_LOAD++) { - if (threadIdx.x + (B40C_RADIXSORT_THREADS * EXTRA_LOAD) < work_decomposition.extra_elements_last_block) { - K key = d_in_keys[offset + (B40C_RADIXSORT_THREADS * EXTRA_LOAD) + threadIdx.x]; - Bucket(key, encoded_carry); - } - } - } - - // Aggregate - ReduceEncodedCounts( - local_counts, - encoded_carry); - - // Write carry in parallel (carries per row are in the first four bytes of each row) - if (threadIdx.x < RADIX_DIGITS) { - - int row = threadIdx.x >> 2; - int col = threadIdx.x & 3; - d_spine[(gridDim.x * threadIdx.x) + blockIdx.x] = encoded_carry[row][col]; - } -} - -} // end namespace b40c_thrust -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h deleted file mode 100644 index 1377999c76..0000000000 --- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_scanscatter_kernel.h +++ /dev/null @@ -1,1207 +0,0 @@ -/****************************************************************************** - * - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - * - * - * AUTHORS' REQUEST: - * - * If you use|reference|benchmark this code, please cite our Technical - * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): - * - * @TechReport{ Merrill:Sorting:2010, - * author = "Duane Merrill and Andrew Grimshaw", - * title = "Revisiting Sorting for GPGPU Stream Architectures", - * year = "2010", - * institution = "University of Virginia, Department of Computer Science", - * address = "Charlottesville, VA, USA", - * number = "CS2010-03" - * } - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - * - ******************************************************************************/ - - -/****************************************************************************** -// Bottom-level digit scanning/scattering kernel - ******************************************************************************/ - -#pragma once - -#include "radixsort_kernel_common.h" - -namespace thrust { -namespace system { -namespace cuda { -namespace detail { -namespace detail { -namespace b40c_thrust { - -/****************************************************************************** - * Appropriate substitutes to use for out-of-bounds key (and value) offsets - ******************************************************************************/ - -template -__device__ __forceinline__ T DefaultextraValue() { - return T(); -} - -template <> -__device__ __forceinline__ unsigned char DefaultextraValue() { - return (unsigned char) -1; -} - -template <> -__device__ __forceinline__ unsigned short DefaultextraValue() { - return (unsigned short) -1; -} - -template <> -__device__ __forceinline__ unsigned int DefaultextraValue() { - return (unsigned int) -1u; -} - -template <> -__device__ __forceinline__ unsigned long DefaultextraValue() { - return (unsigned long) -1ul; -} - -template <> -__device__ __forceinline__ unsigned long long DefaultextraValue() { - return (unsigned long long) -1ull; -} - - -/****************************************************************************** - * Cycle-processing Routines - ******************************************************************************/ - -template -__device__ __forceinline__ int DecodeDigit(K key) -{ - const K DIGIT_MASK = RADIX_DIGITS - 1; - return (key >> BIT) & DIGIT_MASK; -} - - -template -__device__ __forceinline__ void DecodeDigit( - K key, - int &digit, - int &flag_offset, // in bytes - const int SET_OFFSET) -{ - const int PADDED_BYTES_PER_LANE = PADDED_PARTIALS_PER_LANE * 4; - const int SET_OFFSET_BYTES = SET_OFFSET * 4; - const K QUAD_MASK = (RADIX_DIGITS < 4) ? 0x1 : 0x3; - - digit = DecodeDigit(key); - int lane = digit >> 2; - int quad_byte = digit & QUAD_MASK; - - flag_offset = SET_OFFSET_BYTES + FastMul(lane, PADDED_BYTES_PER_LANE) + quad_byte; -} - - -template -__device__ __forceinline__ void DecodeDigits( - typename VecType::Type keypairs[SETS_PER_PASS], - int2 digits[SETS_PER_PASS], - int2 flag_offsets[SETS_PER_PASS]) // in bytes -{ - - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - - const int SET_OFFSET = SET * SCAN_LANES_PER_SET * PADDED_PARTIALS_PER_LANE; - - DecodeDigit( - keypairs[SET].x, digits[SET].x, flag_offsets[SET].x, SET_OFFSET); - - DecodeDigit( - keypairs[SET].y, digits[SET].y, flag_offsets[SET].y, SET_OFFSET); - } -} - - -template -__device__ __forceinline__ void GuardedReadSet( - T *in, - typename VecType::Type &pair, - int offset, - int extra[1], - PreprocessFunctor preprocess = PreprocessFunctor()) -{ - if (offset - extra[0] < 0) { - pair.x = in[offset]; - preprocess(pair.x); - } else { - pair.x = DefaultextraValue(); - } - - if (offset + 1 - extra[0] < 0) { - pair.y = in[offset + 1]; - preprocess(pair.y); - } else { - pair.y = DefaultextraValue(); - } -} - - -template -__device__ __forceinline__ void ReadSets( - typename VecType::Type *d_in, - typename VecType::Type pairs[SETS_PER_PASS], - const int BASE2, - int extra[1], - PreprocessFunctor preprocess = PreprocessFunctor()) -{ - if (UNGUARDED_IO) { - - // N.B. -- I wish we could do some pragma unrolling here too, but the compiler makes it 1% slower - if (SETS_PER_PASS > 0) pairs[0] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 0)]; - if (SETS_PER_PASS > 1) pairs[1] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 1)]; - if (SETS_PER_PASS > 2) pairs[2] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 2)]; - if (SETS_PER_PASS > 3) pairs[3] = d_in[threadIdx.x + BASE2 + (B40C_RADIXSORT_THREADS * 3)]; - - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - preprocess(pairs[SET].x); - preprocess(pairs[SET].y); - } - - } else { - - T* in = (T*) d_in; - - // N.B. -- I wish we could do some pragma unrolling here, but the compiler won't let - // us with user-defined value types (e.g., Fribbitz): "Advisory: Loop was not unrolled, cannot deduce loop trip count" - - if (SETS_PER_PASS > 0) GuardedReadSet(in, pairs[0], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 0), extra); - if (SETS_PER_PASS > 1) GuardedReadSet(in, pairs[1], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 1), extra); - if (SETS_PER_PASS > 2) GuardedReadSet(in, pairs[2], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 2), extra); - if (SETS_PER_PASS > 3) GuardedReadSet(in, pairs[3], (threadIdx.x << 1) + (BASE2 << 1) + (B40C_RADIXSORT_THREADS * 2 * 3), extra); - } -} - - -template -__device__ __forceinline__ void PlacePartials( - unsigned char * base_partial, - int2 digits[SETS_PER_PASS], - int2 flag_offsets[SETS_PER_PASS]) -{ - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - base_partial[flag_offsets[SET].x] = 1; - base_partial[flag_offsets[SET].y] = 1 + (digits[SET].x == digits[SET].y); - } -} - - -template -__device__ __forceinline__ void ExtractRanks( - unsigned char * base_partial, - int2 digits[SETS_PER_PASS], - int2 flag_offsets[SETS_PER_PASS], - int2 ranks[SETS_PER_PASS]) -{ - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - ranks[SET].x = base_partial[flag_offsets[SET].x]; - ranks[SET].y = base_partial[flag_offsets[SET].y] + (digits[SET].x == digits[SET].y); - } -} - - -template -__device__ __forceinline__ void UpdateRanks( - int2 digits[SETS_PER_PASS], - int2 ranks[SETS_PER_PASS], - int digit_counts[SETS_PER_PASS][RADIX_DIGITS]) -{ - // N.B.: I wish we could pragma unroll here, but doing so currently - // results in the 3.1 compilier on 64-bit platforms generating bad - // code for SM1.3, resulting in incorrect sorting (e.g., problem size 16) - - if (SETS_PER_PASS > 0) { - ranks[0].x += digit_counts[0][digits[0].x]; - ranks[0].y += digit_counts[0][digits[0].y]; - } - if (SETS_PER_PASS > 1) { - ranks[1].x += digit_counts[1][digits[1].x]; - ranks[1].y += digit_counts[1][digits[1].y]; - } - if (SETS_PER_PASS > 2) { - ranks[2].x += digit_counts[2][digits[2].x]; - ranks[2].y += digit_counts[2][digits[2].y]; - } - if (SETS_PER_PASS > 3) { - ranks[3].x += digit_counts[3][digits[3].x]; - ranks[3].y += digit_counts[3][digits[3].y]; - } -} - -template -__device__ __forceinline__ void UpdateRanks( - int2 digits[PASSES_PER_CYCLE][SETS_PER_PASS], - int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS], - int digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS]) -{ - // N.B.: I wish we could pragma unroll here, but doing so currently - // results in the 3.1 compilier on 64-bit platforms generating bad - // code for SM1.3, resulting in incorrect sorting (e.g., problem size 16) - - if (PASSES_PER_CYCLE > 0) UpdateRanks(digits[0], ranks[0], digit_counts[0]); - if (PASSES_PER_CYCLE > 1) UpdateRanks(digits[1], ranks[1], digit_counts[1]); - if (PASSES_PER_CYCLE > 2) UpdateRanks(digits[2], ranks[2], digit_counts[2]); - if (PASSES_PER_CYCLE > 3) UpdateRanks(digits[3], ranks[3], digit_counts[3]); -} - - - -template -__device__ __forceinline__ void PrefixScanOverLanes( - int raking_segment[], - int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE], - int copy_section) -{ - // Upsweep rake - int partial_reduction = SerialReduce(raking_segment); - - // Warpscan reduction in digit warpscan_lane - int warpscan_lane = threadIdx.x >> LOG_RAKING_THREADS_PER_LANE; - int group_prefix = WarpScan( - warpscan[warpscan_lane], - partial_reduction, - copy_section); - - // Downsweep rake - SerialScan(raking_segment, group_prefix); - -} - - -template -__device__ __forceinline__ void RecoverDigitCounts( - int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE], - int counts[SETS_PER_PASS], - int copy_section) -{ - int my_lane = threadIdx.x >> 2; - int my_quad_byte = threadIdx.x & 3; - - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - unsigned char *warpscan_count = (unsigned char *) &warpscan[my_lane + (SCAN_LANES_PER_SET * SET)][1 + copy_section][RAKING_THREADS_PER_LANE - 1]; - counts[SET] = warpscan_count[my_quad_byte]; - } -} - -template -__device__ __forceinline__ void CorrectUnguardedSetOverflow( - int2 set_digits, - int &set_count) -{ - if (WarpVoteAll(RADIX_DIGITS, set_count <= 1)) { - // All first-pass, first set keys have same digit. - set_count = (threadIdx.x == set_digits.x) ? 256 : 0; - } -} - -template -__device__ __forceinline__ void CorrectUnguardedPassOverflow( - int2 pass_digits[SETS_PER_PASS], - int pass_counts[SETS_PER_PASS]) -{ - // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, - // telling me "Advisory: Loop was not unrolled, unexpected call OPs" - - if (SETS_PER_PASS > 0) CorrectUnguardedSetOverflow(pass_digits[0], pass_counts[0]); - if (SETS_PER_PASS > 1) CorrectUnguardedSetOverflow(pass_digits[1], pass_counts[1]); - if (SETS_PER_PASS > 2) CorrectUnguardedSetOverflow(pass_digits[2], pass_counts[2]); - if (SETS_PER_PASS > 3) CorrectUnguardedSetOverflow(pass_digits[3], pass_counts[3]); -} - - -template -__device__ __forceinline__ void CorrectUnguardedCycleOverflow( - int2 cycle_digits[PASSES_PER_CYCLE][SETS_PER_PASS], - int cycle_counts[PASSES_PER_CYCLE][SETS_PER_PASS]) -{ - // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, - // telling me "Advisory: Loop was not unrolled, unexpected call OPs" - - if (PASSES_PER_CYCLE > 0) CorrectUnguardedPassOverflow(cycle_digits[0], cycle_counts[0]); - if (PASSES_PER_CYCLE > 1) CorrectUnguardedPassOverflow(cycle_digits[1], cycle_counts[1]); -} - - -template -__device__ __forceinline__ void CorrectLastLaneOverflow(int &count, int extra[1]) -{ - if (WarpVoteAll(RADIX_DIGITS, count == 0) && (threadIdx.x == RADIX_DIGITS - 1)) { - // We're 'f' and we overflowed b/c of invalid 'f' placemarkers; the number of valid items in this set is the count of valid f's - count = extra[0] & 255; - } -} - - -template -__device__ __forceinline__ void CorrectForOverflows( - int2 digits[PASSES_PER_CYCLE][SETS_PER_PASS], - int counts[PASSES_PER_CYCLE][SETS_PER_PASS], - int extra[1]) -{ - if (!UNGUARDED_IO) { - - // Correct any overflow in the partially-filled last lane - int *linear_counts = (int *) counts; - CorrectLastLaneOverflow(linear_counts[SETS_PER_CYCLE - 1], extra); - } - - CorrectUnguardedCycleOverflow(digits, counts); -} - - -template < - typename K, - int BIT, - int RADIX_DIGITS, - int SCAN_LANES_PER_SET, - int SETS_PER_PASS, - int RAKING_THREADS_PER_PASS, - int SCAN_LANES_PER_PASS, - int LOG_RAKING_THREADS_PER_LANE, - int RAKING_THREADS_PER_LANE, - int PARTIALS_PER_SEG, - int PADDED_PARTIALS_PER_LANE, - int PASSES_PER_CYCLE> -__device__ __forceinline__ void ScanPass( - int *base_partial, - int *raking_partial, - int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE], - typename VecType::Type keypairs[SETS_PER_PASS], - int2 digits[SETS_PER_PASS], - int2 flag_offsets[SETS_PER_PASS], - int2 ranks[SETS_PER_PASS], - int copy_section) -{ - // Reset smem - #pragma unroll - for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES_PER_PASS; SCAN_LANE++) { - base_partial[SCAN_LANE * PADDED_PARTIALS_PER_LANE] = 0; - } - - // Decode digits for first pass - DecodeDigits( - keypairs, digits, flag_offsets); - - // Encode counts into smem for first pass - PlacePartials( - (unsigned char *) base_partial, - digits, - flag_offsets); - - __syncthreads(); - - // Intra-group prefix scans for first pass - if (threadIdx.x < RAKING_THREADS_PER_PASS) { - - PrefixScanOverLanes( // first pass is offset right by one - raking_partial, - warpscan, - copy_section); - } - - __syncthreads(); - - // Extract ranks - ExtractRanks( - (unsigned char *) base_partial, - digits, - flag_offsets, - ranks); -} - - -/****************************************************************************** - * SM1.3 Local Exchange Routines - * - * Routines for exchanging keys (and values) in shared memory (i.e., local - * scattering) in order to to facilitate coalesced global scattering - ******************************************************************************/ - -template -__device__ __forceinline__ void ScatterSets( - T *d_out, - typename VecType::Type pairs[SETS_PER_PASS], - int2 offsets[SETS_PER_PASS], - const int BASE4, - int extra[1], - PostprocessFunctor postprocess = PostprocessFunctor()) -{ - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - postprocess(pairs[SET].x); - postprocess(pairs[SET].y); - } - - // N.B. -- I wish we could do some pragma unrolling here too, but the compiler makes it 1% slower - - if (SETS_PER_PASS > 0) { - if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 0) < extra[0])) - d_out[offsets[0].x] = pairs[0].x; - if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 1) < extra[0])) - d_out[offsets[0].y] = pairs[0].y; - } - - if (SETS_PER_PASS > 1) { - if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 2) < extra[0])) - d_out[offsets[1].x] = pairs[1].x; - if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 3) < extra[0])) - d_out[offsets[1].y] = pairs[1].y; - } - - if (SETS_PER_PASS > 2) { - if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 4) < extra[0])) - d_out[offsets[2].x] = pairs[2].x; - if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 5) < extra[0])) - d_out[offsets[2].y] = pairs[2].y; - } - - if (SETS_PER_PASS > 3) { - if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 6) < extra[0])) - d_out[offsets[3].x] = pairs[3].x; - if (UNGUARDED_IO || (threadIdx.x + BASE4 + (B40C_RADIXSORT_THREADS * 7) < extra[0])) - d_out[offsets[3].y] = pairs[3].y; - } -} - -template -__device__ __forceinline__ void PushPairs( - T *swap, - typename VecType::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS], - int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS]) -{ - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - swap[ranks[PASS][SET].x] = pairs[PASS][SET].x; - swap[ranks[PASS][SET].y] = pairs[PASS][SET].y; - } - } -} - -template -__device__ __forceinline__ void ExchangePairs( - T *swap, - typename VecType::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS], - int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS]) -{ - // Push in Pairs - PushPairs(swap, pairs, ranks); - - __syncthreads(); - - // Extract pairs - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - const int BLOCK = ((PASS * SETS_PER_PASS) + SET) * 2; - pairs[PASS][SET].x = swap[threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 0))]; - pairs[PASS][SET].y = swap[threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 1))]; - } - } -} - - -template < - typename K, - typename V, - int RADIX_DIGITS, - int BIT, - int PASSES_PER_CYCLE, - int SETS_PER_PASS, - bool UNGUARDED_IO, - typename PostprocessFunctor> -__device__ __forceinline__ void SwapAndScatterSm13( - typename VecType::Type keypairs[PASSES_PER_CYCLE][SETS_PER_PASS], - int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS], - int4 *exchange, - typename VecType::Type *d_in_values, - K *d_out_keys, - V *d_out_values, - int carry[RADIX_DIGITS], - int extra[1]) -{ - int2 offsets[PASSES_PER_CYCLE][SETS_PER_PASS]; - - // Swap keys according to ranks - ExchangePairs((K*) exchange, keypairs, ranks); - - // Calculate scatter offsets (re-decode digits from keys: it's less work than making a second exchange of digits) - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - const int BLOCK = ((PASS * SETS_PER_PASS) + SET) * 2; - offsets[PASS][SET].x = threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 0)) + carry[DecodeDigit(keypairs[PASS][SET].x)]; - offsets[PASS][SET].y = threadIdx.x + (B40C_RADIXSORT_THREADS * (BLOCK + 1)) + carry[DecodeDigit(keypairs[PASS][SET].y)]; - } - } - - // Scatter keys - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - const int BLOCK = PASS * SETS_PER_PASS * 2; - ScatterSets(d_out_keys, keypairs[PASS], offsets[PASS], B40C_RADIXSORT_THREADS * BLOCK, extra); - } - - if (!IsKeysOnly()) { - - __syncthreads(); - - // Read input data - typename VecType::Type datapairs[PASSES_PER_CYCLE][SETS_PER_PASS]; - - // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, - // telling me "Advisory: Loop was not unrolled, unexpected control flow" - - if (PASSES_PER_CYCLE > 0) ReadSets >(d_in_values, datapairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra); - if (PASSES_PER_CYCLE > 1) ReadSets >(d_in_values, datapairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra); - - // Swap data according to ranks - ExchangePairs((V*) exchange, datapairs, ranks); - - // Scatter data - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - const int BLOCK = PASS * SETS_PER_PASS * 2; - ScatterSets >(d_out_values, datapairs[PASS], offsets[PASS], B40C_RADIXSORT_THREADS * BLOCK, extra); - } - } -} - - -/****************************************************************************** - * SM1.0 Local Exchange Routines - * - * Routines for exchanging keys (and values) in shared memory (i.e., local - * scattering) in order to to facilitate coalesced global scattering - ******************************************************************************/ - -template < - typename T, - int RADIX_DIGITS, - bool UNGUARDED_IO, - typename PostprocessFunctor> -__device__ __forceinline__ void ScatterPass( - T *swapmem, - T *d_out, - int digit_scan[2][RADIX_DIGITS], - int carry[RADIX_DIGITS], - int extra[1], - int base_digit, - PostprocessFunctor postprocess = PostprocessFunctor()) -{ - const int LOG_STORE_TXN_THREADS = B40C_LOG_MEM_BANKS(__CUDA_ARCH__); - const int STORE_TXN_THREADS = 1 << LOG_STORE_TXN_THREADS; - - int store_txn_idx = threadIdx.x & (STORE_TXN_THREADS - 1); - int store_txn_digit = threadIdx.x >> LOG_STORE_TXN_THREADS; - - int my_digit = base_digit + store_txn_digit; - if (my_digit < RADIX_DIGITS) { - - int my_exclusive_scan = digit_scan[1][my_digit - 1]; - int my_inclusive_scan = digit_scan[1][my_digit]; - int my_digit_count = my_inclusive_scan - my_exclusive_scan; - - int my_carry = carry[my_digit] + my_exclusive_scan; - int my_aligned_offset = store_txn_idx - (my_carry & (STORE_TXN_THREADS - 1)); - - while (my_aligned_offset < my_digit_count) { - - if ((my_aligned_offset >= 0) && (UNGUARDED_IO || (my_exclusive_scan + my_aligned_offset < extra[0]))) { - - T datum = swapmem[my_exclusive_scan + my_aligned_offset]; - postprocess(datum); - d_out[my_carry + my_aligned_offset] = datum; - } - my_aligned_offset += STORE_TXN_THREADS; - } - } -} - -template < - typename T, - int RADIX_DIGITS, - int PASSES_PER_CYCLE, - int SETS_PER_PASS, - bool UNGUARDED_IO, - typename PostprocessFunctor> -__device__ __forceinline__ void SwapAndScatterPairs( - typename VecType::Type pairs[PASSES_PER_CYCLE][SETS_PER_PASS], - int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS], - T *exchange, - T *d_out, - int carry[RADIX_DIGITS], - int digit_scan[2][RADIX_DIGITS], - int extra[1]) -{ - const int SCATTER_PASS_DIGITS = B40C_RADIXSORT_WARPS * (B40C_WARP_THREADS / B40C_MEM_BANKS(__CUDA_ARCH__)); - const int SCATTER_PASSES = RADIX_DIGITS / SCATTER_PASS_DIGITS; - - // Push in pairs - PushPairs(exchange, pairs, ranks); - - __syncthreads(); - - // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, - // telling me "Advisory: Loop was not unrolled, not an innermost loop" - - if (SCATTER_PASSES > 0) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 0); - if (SCATTER_PASSES > 1) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 1); - if (SCATTER_PASSES > 2) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 2); - if (SCATTER_PASSES > 3) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 3); - if (SCATTER_PASSES > 4) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 4); - if (SCATTER_PASSES > 5) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 5); - if (SCATTER_PASSES > 6) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 6); - if (SCATTER_PASSES > 7) ScatterPass(exchange, d_out, digit_scan, carry, extra, SCATTER_PASS_DIGITS * 7); -} - - -template < - typename K, - typename V, - int RADIX_DIGITS, - int PASSES_PER_CYCLE, - int SETS_PER_PASS, - bool UNGUARDED_IO, - typename PostprocessFunctor> -__device__ __forceinline__ void SwapAndScatterSm10( - typename VecType::Type keypairs[PASSES_PER_CYCLE][SETS_PER_PASS], - int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS], - int4 *exchange, - typename VecType::Type *d_in_values, - K *d_out_keys, - V *d_out_values, - int carry[RADIX_DIGITS], - int digit_scan[2][RADIX_DIGITS], - int extra[1]) -{ - // Swap and scatter keys - SwapAndScatterPairs( - keypairs, ranks, (K*) exchange, d_out_keys, carry, digit_scan, extra); - - if (!IsKeysOnly()) { - - __syncthreads(); - - // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, - // telling me "Advisory: Loop was not unrolled, unexpected control flow" - - // Read input data - typename VecType::Type datapairs[PASSES_PER_CYCLE][SETS_PER_PASS]; - if (PASSES_PER_CYCLE > 0) ReadSets >(d_in_values, datapairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra); - if (PASSES_PER_CYCLE > 1) ReadSets >(d_in_values, datapairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra); - - // Swap and scatter data - SwapAndScatterPairs >( - datapairs, ranks, (V*) exchange, d_out_values, carry, digit_scan, extra); - } -} - - -/****************************************************************************** - * Cycle of RADIXSORT_CYCLE_ELEMENTS keys (and values) - ******************************************************************************/ - -template < - typename K, - typename V, - int BIT, - bool UNGUARDED_IO, - int RADIX_DIGITS, - int LOG_SCAN_LANES_PER_SET, - int SCAN_LANES_PER_SET, - int SETS_PER_PASS, - int PASSES_PER_CYCLE, - int LOG_SCAN_LANES_PER_PASS, - int SCAN_LANES_PER_PASS, - int LOG_PARTIALS_PER_LANE, - int LOG_PARTIALS_PER_PASS, - int LOG_RAKING_THREADS_PER_PASS, - int RAKING_THREADS_PER_PASS, - int LOG_RAKING_THREADS_PER_LANE, - int RAKING_THREADS_PER_LANE, - int LOG_PARTIALS_PER_SEG, - int PARTIALS_PER_SEG, - int LOG_PARTIALS_PER_ROW, - int PARTIALS_PER_ROW, - int LOG_SEGS_PER_ROW, - int SEGS_PER_ROW, - int LOG_ROWS_PER_SET, - int LOG_ROWS_PER_LANE, - int ROWS_PER_LANE, - int LOG_ROWS_PER_PASS, - int ROWS_PER_PASS, - int MAX_EXCHANGE_BYTES, - typename PreprocessFunctor, - typename PostprocessFunctor> - -__device__ __forceinline__ void SrtsScanDigitCycle( - typename VecType::Type *d_in_keys, - typename VecType::Type *d_in_values, - K *d_out_keys, - V *d_out_values, - int4 *exchange, - int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE], - int carry[RADIX_DIGITS], - int digit_scan[2][RADIX_DIGITS], - int digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS], - int extra[1], - int *base_partial, - int *raking_partial) -{ - - const int PADDED_PARTIALS_PER_LANE = ROWS_PER_LANE * (PARTIALS_PER_ROW + 1); - const int SETS_PER_CYCLE = PASSES_PER_CYCLE * SETS_PER_PASS; - - // N.B.: We use the following voodoo incantations to elide the compiler's miserable - // "declared but never referenced" warnings for these (which are actually used for - // template instantiation) - SuppressUnusedConstantWarning(PADDED_PARTIALS_PER_LANE); - SuppressUnusedConstantWarning(SETS_PER_CYCLE); - - typename VecType::Type keypairs[PASSES_PER_CYCLE][SETS_PER_PASS]; - int2 digits[PASSES_PER_CYCLE][SETS_PER_PASS]; - int2 flag_offsets[PASSES_PER_CYCLE][SETS_PER_PASS]; // a byte offset - int2 ranks[PASSES_PER_CYCLE][SETS_PER_PASS]; - - - //------------------------------------------------------------------------- - // Read keys - //------------------------------------------------------------------------- - - // N.B. -- I wish we could do some pragma unrolling here too, but the compiler won't comply, - // telling me "Advisory: Loop was not unrolled, unexpected control flow construct" - - // Read Keys - if (PASSES_PER_CYCLE > 0) ReadSets(d_in_keys, keypairs[0], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 0, extra); - if (PASSES_PER_CYCLE > 1) ReadSets(d_in_keys, keypairs[1], B40C_RADIXSORT_THREADS * SETS_PER_PASS * 1, extra); - - //------------------------------------------------------------------------- - // Lane-scanning Passes - //------------------------------------------------------------------------- - - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - - // First Pass - ScanPass( - base_partial, - raking_partial, - warpscan, - keypairs[PASS], - digits[PASS], - flag_offsets[PASS], - ranks[PASS], - PASSES_PER_CYCLE - PASS - 1); // lower passes get copied right - } - - //------------------------------------------------------------------------- - // Digit-scanning - //------------------------------------------------------------------------- - - // Recover second-half digit-counts, scan across all digit-counts - if (threadIdx.x < RADIX_DIGITS) { - - int counts[PASSES_PER_CYCLE][SETS_PER_PASS]; - - // Recover digit-counts - - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - RecoverDigitCounts( // first pass, offset by 1 - warpscan, - counts[PASS], - PASSES_PER_CYCLE - PASS - 1); // lower passes get copied right - } - - // Check for overflows - CorrectForOverflows( - digits, counts, extra); - - // Scan across my digit counts for each set - int exclusive_total = 0; - int inclusive_total = 0; - - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - inclusive_total += counts[PASS][SET]; - counts[PASS][SET] = exclusive_total; - exclusive_total = inclusive_total; - } - } - - // second half of carry update - int my_carry = carry[threadIdx.x] + digit_scan[1][threadIdx.x]; - - // Perform overflow-free SIMD Kogge-Stone across digits - int digit_prefix = WarpScan( - digit_scan, - inclusive_total, - 0); - - // first-half of carry update - carry[threadIdx.x] = my_carry - digit_prefix; - - #pragma unroll - for (int PASS = 0; PASS < (int) PASSES_PER_CYCLE; PASS++) { - - #pragma unroll - for (int SET = 0; SET < (int) SETS_PER_PASS; SET++) { - digit_counts[PASS][SET][threadIdx.x] = counts[PASS][SET] + digit_prefix; - } - } - } - - __syncthreads(); - - //------------------------------------------------------------------------- - // Update Ranks - //------------------------------------------------------------------------- - - UpdateRanks(digits, ranks, digit_counts); - - - //------------------------------------------------------------------------- - // Scatter - //------------------------------------------------------------------------- - -#if ((__CUDA_ARCH__ < 130) || FERMI_ECC) - - SwapAndScatterSm10( - keypairs, - ranks, - exchange, - d_in_values, - d_out_keys, - d_out_values, - carry, - digit_scan, - extra); - -#else - - SwapAndScatterSm13( - keypairs, - ranks, - exchange, - d_in_values, - d_out_keys, - d_out_values, - carry, - extra); - -#endif - - __syncthreads(); - -} - - - -/****************************************************************************** - * Scan/Scatter Kernel Entry Point - ******************************************************************************/ - -template < - typename K, - typename V, - int PASS, - int RADIX_BITS, - int BIT, - typename PreprocessFunctor, - typename PostprocessFunctor> -__launch_bounds__ (B40C_RADIXSORT_THREADS, B40C_RADIXSORT_SCAN_SCATTER_CTA_OCCUPANCY(__CUDA_ARCH__)) -__global__ -void ScanScatterDigits( - bool *d_from_alt_storage, - int* d_spine, - K* d_in_keys, - K* d_out_keys, - V* d_in_values, - V* d_out_values, - CtaDecomposition work_decomposition) -{ - - const int RADIX_DIGITS = 1 << RADIX_BITS; - - const int LOG_SCAN_LANES_PER_SET = (RADIX_BITS > 2) ? RADIX_BITS - 2 : 0; // Always at one lane per set - const int SCAN_LANES_PER_SET = 1 << LOG_SCAN_LANES_PER_SET; // N.B.: we have "declared but never referenced" warnings for these, but they're actually used for template instantiation - - const int LOG_SETS_PER_PASS = B40C_RADIXSORT_LOG_SETS_PER_PASS(__CUDA_ARCH__); - const int SETS_PER_PASS = 1 << LOG_SETS_PER_PASS; - - const int LOG_PASSES_PER_CYCLE = B40C_RADIXSORT_LOG_PASSES_PER_CYCLE(__CUDA_ARCH__, K, V); - const int PASSES_PER_CYCLE = 1 << LOG_PASSES_PER_CYCLE; - - const int LOG_SCAN_LANES_PER_PASS = LOG_SETS_PER_PASS + LOG_SCAN_LANES_PER_SET; - const int SCAN_LANES_PER_PASS = 1 << LOG_SCAN_LANES_PER_PASS; - - const int LOG_PARTIALS_PER_LANE = B40C_RADIXSORT_LOG_THREADS; - - const int LOG_PARTIALS_PER_PASS = LOG_SCAN_LANES_PER_PASS + LOG_PARTIALS_PER_LANE; - - const int LOG_RAKING_THREADS_PER_PASS = B40C_RADIXSORT_LOG_RAKING_THREADS_PER_PASS(__CUDA_ARCH__); - const int RAKING_THREADS_PER_PASS = 1 << LOG_RAKING_THREADS_PER_PASS; - - const int LOG_RAKING_THREADS_PER_LANE = LOG_RAKING_THREADS_PER_PASS - LOG_SCAN_LANES_PER_PASS; - const int RAKING_THREADS_PER_LANE = 1 << LOG_RAKING_THREADS_PER_LANE; - - const int LOG_PARTIALS_PER_SEG = LOG_PARTIALS_PER_LANE - LOG_RAKING_THREADS_PER_LANE; - const int PARTIALS_PER_SEG = 1 << LOG_PARTIALS_PER_SEG; - - const int LOG_PARTIALS_PER_ROW = (LOG_PARTIALS_PER_SEG < B40C_LOG_MEM_BANKS(__CUDA_ARCH__)) ? B40C_LOG_MEM_BANKS(__CUDA_ARCH__) : LOG_PARTIALS_PER_SEG; // floor of MEM_BANKS partials per row - const int PARTIALS_PER_ROW = 1 << LOG_PARTIALS_PER_ROW; - const int PADDED_PARTIALS_PER_ROW = PARTIALS_PER_ROW + 1; - - const int LOG_SEGS_PER_ROW = LOG_PARTIALS_PER_ROW - LOG_PARTIALS_PER_SEG; - const int SEGS_PER_ROW = 1 << LOG_SEGS_PER_ROW; - - const int LOG_ROWS_PER_SET = LOG_PARTIALS_PER_PASS - LOG_PARTIALS_PER_ROW; - - const int LOG_ROWS_PER_LANE = LOG_PARTIALS_PER_LANE - LOG_PARTIALS_PER_ROW; - const int ROWS_PER_LANE = 1 << LOG_ROWS_PER_LANE; - - const int LOG_ROWS_PER_PASS = LOG_SCAN_LANES_PER_PASS + LOG_ROWS_PER_LANE; - const int ROWS_PER_PASS = 1 << LOG_ROWS_PER_PASS; - - const int SCAN_LANE_BYTES = ROWS_PER_PASS * PADDED_PARTIALS_PER_ROW * sizeof(int); - const int MAX_EXCHANGE_BYTES = (sizeof(K) > sizeof(V)) ? - B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) * sizeof(K) : - B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V) * sizeof(V); - const int SCAN_LANE_INT4S = (B40C_MAX(MAX_EXCHANGE_BYTES, SCAN_LANE_BYTES) + sizeof(int4) - 1) / sizeof(int4); - - - // N.B.: We use the following voodoo incantations to elide the compiler's miserable - // "declared but never referenced" warnings for these (which are actually used for - // template instantiation) - SuppressUnusedConstantWarning(SCAN_LANES_PER_SET); - SuppressUnusedConstantWarning(PARTIALS_PER_SEG); - SuppressUnusedConstantWarning(LOG_ROWS_PER_SET); - SuppressUnusedConstantWarning(ROWS_PER_LANE); - - // scan_lanes is a int4[] to avoid alignment issues when casting to (K *) and/or (V *) - __shared__ int4 scan_lanes[SCAN_LANE_INT4S]; - __shared__ int warpscan[SCAN_LANES_PER_PASS][3][RAKING_THREADS_PER_LANE]; // One warpscan per fours-group - __shared__ int carry[RADIX_DIGITS]; - __shared__ int digit_scan[2][RADIX_DIGITS]; - __shared__ int digit_counts[PASSES_PER_CYCLE][SETS_PER_PASS][RADIX_DIGITS]; - __shared__ bool non_trivial_digit_pass; - __shared__ bool from_alt_storage; - - _B40C_REG_MISER_QUALIFIER_ int extra[1]; - _B40C_REG_MISER_QUALIFIER_ int oob[1]; - - extra[0] = (blockIdx.x == gridDim.x - 1) ? work_decomposition.extra_elements_last_block : 0; - - // calculate our threadblock's range - int block_elements, block_offset; - if (blockIdx.x < work_decomposition.num_big_blocks) { - block_offset = work_decomposition.big_block_elements * blockIdx.x; - block_elements = work_decomposition.big_block_elements; - } else { - block_offset = (work_decomposition.normal_block_elements * blockIdx.x) + (work_decomposition.num_big_blocks * B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V)); - block_elements = work_decomposition.normal_block_elements; - } - oob[0] = block_offset + block_elements; // out-of-bounds - - - // location for placing 2-element partial reductions in the first lane of a pass - int row = threadIdx.x >> LOG_PARTIALS_PER_ROW; - int col = threadIdx.x & (PARTIALS_PER_ROW - 1); - int *base_partial = reinterpret_cast(scan_lanes) + (row * PADDED_PARTIALS_PER_ROW) + col; - - // location for raking across all sets within a pass - int *raking_partial = 0; - - if (threadIdx.x < RAKING_THREADS_PER_PASS) { - - // initalize lane warpscans - if (threadIdx.x < RAKING_THREADS_PER_LANE) { - - #pragma unroll - for (int SCAN_LANE = 0; SCAN_LANE < (int) SCAN_LANES_PER_PASS; SCAN_LANE++) { - warpscan[SCAN_LANE][0][threadIdx.x] = 0; - } - } - - // initialize digit warpscans - if (threadIdx.x < RADIX_DIGITS) { - - // Initialize digit_scan - digit_scan[0][threadIdx.x] = 0; - digit_scan[1][threadIdx.x] = 0; - - // Determine where to read our input - from_alt_storage = (PASS == 0) ? false : d_from_alt_storage[PASS & 0x1]; - - // Read carry in parallel - int spine_digit_offset = FastMul(gridDim.x, threadIdx.x); - int my_digit_carry = d_spine[spine_digit_offset + blockIdx.x]; - carry[threadIdx.x] = my_digit_carry; - - // Determine whether or not we have work to do and setup the next round - // accordingly. Everybody but the first threadblock can determine this - // from the number of non-zero-and-non-oob digit carries. First block - // needs someone else's because he always writes the zero offset. - - int predicate; - if (PreprocessFunctor::MustApply() || PostprocessFunctor::MustApply()) { - - non_trivial_digit_pass = true; - - } else { - - if (blockIdx.x > 0) { - // Non-first CTA : use digit-carry from first block - my_digit_carry = d_spine[spine_digit_offset]; - } - - predicate = ((my_digit_carry > 0) && (my_digit_carry < work_decomposition.num_elements)); - non_trivial_digit_pass = (TallyWarpVote(RADIX_DIGITS, predicate, reinterpret_cast(scan_lanes)) > 0); - } - - // Let the next round know which set of buffers to use - if (blockIdx.x == 0) d_from_alt_storage[(PASS + 1) & 0x1] = from_alt_storage ^ non_trivial_digit_pass; - } - - // initialize raking segment - row = threadIdx.x >> LOG_SEGS_PER_ROW; - col = (threadIdx.x & (SEGS_PER_ROW - 1)) << LOG_PARTIALS_PER_SEG; - raking_partial = reinterpret_cast(scan_lanes) + (row * PADDED_PARTIALS_PER_ROW) + col; - } - - // Sync to acquire non_trivial_digit_pass and from_temp_storage - __syncthreads(); - - // Short-circuit this entire pass - if (!non_trivial_digit_pass) return; - - if (!from_alt_storage) { - - // Scan in tiles of cycle_elements - while (block_offset < oob[0]) { - - SrtsScanDigitCycle( - reinterpret_cast::Type *>((void *) &d_in_keys[block_offset]), - reinterpret_cast::Type *>((void *) &d_in_values[block_offset]), - d_out_keys, - d_out_values, - scan_lanes, - warpscan, - carry, - digit_scan, - digit_counts, - extra, - base_partial, - raking_partial); - - block_offset += B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V); - } - - if (extra[0]) { - - SrtsScanDigitCycle( - reinterpret_cast::Type *>((void *) &d_in_keys[block_offset]), - reinterpret_cast::Type *>((void *) &d_in_values[block_offset]), - d_out_keys, - d_out_values, - scan_lanes, - warpscan, - carry, - digit_scan, - digit_counts, - extra, - base_partial, - raking_partial); - } - - } else { - - // Scan in tiles of cycle_elements - while (block_offset < oob[0]) { - - SrtsScanDigitCycle( - reinterpret_cast::Type *>((void *) &d_out_keys[block_offset]), - reinterpret_cast::Type *>((void *) &d_out_values[block_offset]), - d_in_keys, - d_in_values, - scan_lanes, - warpscan, - carry, - digit_scan, - digit_counts, - extra, - base_partial, - raking_partial); - - block_offset += B40C_RADIXSORT_CYCLE_ELEMENTS(__CUDA_ARCH__, K, V); - } - - if (extra[0]) { - - SrtsScanDigitCycle( - reinterpret_cast::Type *>((void *) &d_out_keys[block_offset]), - reinterpret_cast::Type *>((void *) &d_out_values[block_offset]), - d_in_keys, - d_in_values, - scan_lanes, - warpscan, - carry, - digit_scan, - digit_counts, - extra, - base_partial, - raking_partial); - } - - } -} - -} // end namespace b40c_thrust -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h b/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h deleted file mode 100644 index 3d20f4aa79..0000000000 --- a/compat/thrust/system/cuda/detail/detail/b40c/radixsort_spine_kernel.h +++ /dev/null @@ -1,187 +0,0 @@ -/****************************************************************************** - * - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * - * - * - * AUTHORS' REQUEST: - * - * If you use|reference|benchmark this code, please cite our Technical - * Report (http://www.cs.virginia.edu/~dgm4d/papers/RadixSortTR.pdf): - * - * @TechReport{ Merrill:Sorting:2010, - * author = "Duane Merrill and Andrew Grimshaw", - * title = "Revisiting Sorting for GPGPU Stream Architectures", - * year = "2010", - * institution = "University of Virginia, Department of Computer Science", - * address = "Charlottesville, VA, USA", - * number = "CS2010-03" - * } - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - * - ******************************************************************************/ - - -/****************************************************************************** - * Top-level histogram/spine scanning kernel - ******************************************************************************/ - -#pragma once - -#include "radixsort_kernel_common.h" - -namespace thrust { -namespace system { -namespace cuda { -namespace detail { -namespace detail { -namespace b40c_thrust { - -/****************************************************************************** - * Scans a cycle of RADIXSORT_CYCLE_ELEMENTS elements - ******************************************************************************/ - -template -__device__ __forceinline__ void SrtsScanCycle( - int *smem_offset, - int *smem_segment, - int warpscan[2][B40C_WARP_THREADS], - int4 *in, - int4 *out, - int &carry) -{ - int4 datum; - - // read input data - datum = in[threadIdx.x]; - - smem_offset[0] = datum.x + datum.y + datum.z + datum.w; - - __syncthreads(); - - if (threadIdx.x < B40C_WARP_THREADS) { - - int partial_reduction = SerialReduce(smem_segment); - - int seed = WarpScan(warpscan, partial_reduction, 0); - seed += carry; - - SerialScan(smem_segment, seed); - - carry += warpscan[1][B40C_WARP_THREADS - 1]; - } - - __syncthreads(); - - int part0 = smem_offset[0]; - int part1; - - part1 = datum.x + part0; - datum.x = part0; - part0 = part1 + datum.y; - datum.y = part1; - - part1 = datum.z + part0; - datum.z = part0; - part0 = part1 + datum.w; - datum.w = part1; - - out[threadIdx.x] = datum; -} - - -/****************************************************************************** - * Spine/histogram Scan Kernel Entry Point - ******************************************************************************/ - -template -__global__ void SrtsScanSpine( - int *d_ispine, - int *d_ospine, - int normal_block_elements) -{ - const int LOG_PARTIALS = B40C_RADIXSORT_LOG_THREADS; - const int PARTIALS = 1 << LOG_PARTIALS; - - const int LOG_PARTIALS_PER_SEG = LOG_PARTIALS - B40C_LOG_WARP_THREADS; - const int PARTIALS_PER_SEG = 1 << LOG_PARTIALS_PER_SEG; - - const int LOG_PARTIALS_PER_ROW = (LOG_PARTIALS_PER_SEG < B40C_LOG_MEM_BANKS(__CUDA_ARCH__)) ? B40C_LOG_MEM_BANKS(__CUDA_ARCH__) : LOG_PARTIALS_PER_SEG; // floor of 32 elts per row - const int PARTIALS_PER_ROW = 1 << LOG_PARTIALS_PER_ROW; - - const int LOG_SEGS_PER_ROW = LOG_PARTIALS_PER_ROW - LOG_PARTIALS_PER_SEG; - const int SEGS_PER_ROW = 1 << LOG_SEGS_PER_ROW; - - const int SMEM_ROWS = PARTIALS / PARTIALS_PER_ROW; - - __shared__ int smem[SMEM_ROWS][PARTIALS_PER_ROW + 1]; - __shared__ int warpscan[2][B40C_WARP_THREADS]; - - // WAR spurious unused constant warning - SuppressUnusedConstantWarning(PARTIALS_PER_SEG); - - int *smem_segment = 0; - int carry = 0; - - int row = threadIdx.x >> LOG_PARTIALS_PER_ROW; - int col = threadIdx.x & (PARTIALS_PER_ROW - 1); - int *smem_offset = &smem[row][col]; - - if (blockIdx.x > 0) { - return; - } - - if (threadIdx.x < B40C_WARP_THREADS) { - - // two segs per row, odd segs are offset by 8 - row = threadIdx.x >> LOG_SEGS_PER_ROW; - col = (threadIdx.x & (SEGS_PER_ROW - 1)) << LOG_PARTIALS_PER_SEG; - smem_segment = &smem[row][col]; - - if (threadIdx.x < B40C_WARP_THREADS) { - carry = 0; - warpscan[0][threadIdx.x] = 0; - } - } - - // scan the spine in blocks of cycle_elements - int block_offset = 0; - while (block_offset < normal_block_elements) { - - SrtsScanCycle( - smem_offset, - smem_segment, - warpscan, - reinterpret_cast((void *) &d_ispine[block_offset]), - reinterpret_cast((void *) &d_ospine[block_offset]), - carry); - - block_offset += B40C_RADIXSORT_SPINE_CYCLE_ELEMENTS; - } -} - - -} // end namespace b40c_thrust -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h b/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h deleted file mode 100644 index 6db7931078..0000000000 --- a/compat/thrust/system/cuda/detail/detail/b40c/vector_types.h +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Copyright 2010 Duane Merrill - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * For more information, see our Google Code project site: - * http://code.google.com/p/back40computing/ - * - * Thanks! - */ - -#pragma once - -#include - -namespace thrust { -namespace system { -namespace cuda { -namespace detail { -namespace detail { -namespace b40c_thrust { - -//------------------------------------------------------------------------------ -// Vector types -//------------------------------------------------------------------------------ - -template struct VecType; - - -// -// Define general vector types -// - -template -struct VecType { - K x; - typedef K Type; -}; - -template -struct VecType { - K x; - K y; - typedef VecType Type; -}; - -template -struct VecType { - K x; - K y; - K z; - K w; - typedef VecType Type; -}; - -// -// Specialize certain built-in vector types -// - -#define B40C_DEFINE_VECTOR_TYPE(base_type,short_type) \ - template<> struct VecType { typedef short_type##1 Type; }; \ - template<> struct VecType { typedef short_type##2 Type; }; \ - template<> struct VecType { typedef short_type##4 Type; }; - -B40C_DEFINE_VECTOR_TYPE(char, char) -B40C_DEFINE_VECTOR_TYPE(short, short) -B40C_DEFINE_VECTOR_TYPE(int, int) -B40C_DEFINE_VECTOR_TYPE(long, long) -B40C_DEFINE_VECTOR_TYPE(long long, longlong) -B40C_DEFINE_VECTOR_TYPE(unsigned char, uchar) -B40C_DEFINE_VECTOR_TYPE(unsigned short, ushort) -B40C_DEFINE_VECTOR_TYPE(unsigned int, uint) -B40C_DEFINE_VECTOR_TYPE(unsigned long, ulong) -B40C_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong) -B40C_DEFINE_VECTOR_TYPE(float, float) -B40C_DEFINE_VECTOR_TYPE(double, double) - -#undef B40C_DEFINE_VECTOR_TYPE - -} // end namespace b40c_thrust -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/balanced_path.h b/compat/thrust/system/cuda/detail/detail/balanced_path.h deleted file mode 100644 index 51e4f5b767..0000000000 --- a/compat/thrust/system/cuda/detail/detail/balanced_path.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ -namespace balanced_path_detail -{ - -template -__host__ __device__ void BinarySearchIteration(It data, int& begin, int& end, - T key, int shift, Comp comp) { - - IntT scale = (1<< shift) - 1; - int mid = (int)((begin + scale * end)>> shift); - - T key2 = data[mid]; - bool pred = UpperBound ? !comp(key, key2) : comp(key2, key); - if(pred) begin = (int)mid + 1; - else end = mid; -} - -template -__host__ __device__ int BinarySearch(It data, int count, T key, Comp comp) { - int begin = 0; - int end = count; - while(begin < end) - BinarySearchIteration(data, begin, end, key, 1, comp); - return begin; -} - -template -__host__ __device__ int BiasedBinarySearch(It data, int count, T key, - IntT levels, Comp comp) { - int begin = 0; - int end = count; - - if(levels >= 4 && begin < end) - BinarySearchIteration(data, begin, end, key, 9, comp); - if(levels >= 3 && begin < end) - BinarySearchIteration(data, begin, end, key, 7, comp); - if(levels >= 2 && begin < end) - BinarySearchIteration(data, begin, end, key, 5, comp); - if(levels >= 1 && begin < end) - BinarySearchIteration(data, begin, end, key, 4, comp); - - while(begin < end) - BinarySearchIteration(data, begin, end, key, 1, comp); - return begin; -} - -template -__host__ __device__ int MergePath(It1 a, int aCount, It2 b, int bCount, int diag, Comp comp) -{ - typedef typename thrust::iterator_traits::value_type T; - - int begin = thrust::max(0, diag - bCount); - int end = thrust::min(diag, aCount); - - while(begin < end) - { - int mid = (begin + end)>> 1; - T aKey = a[mid]; - T bKey = b[diag - 1 - mid]; - bool pred = UpperBound ? comp(aKey, bKey) : !comp(bKey, aKey); - if(pred) begin = mid + 1; - else end = mid; - } - return begin; -} - - -} // end namespace balanced_path_detail - - -template -__host__ __device__ -thrust::pair - balanced_path(RandomAccessIterator1 first1, Size1 n1, - RandomAccessIterator2 first2, Size1 n2, - Size1 diag, - Size2 levels, - Compare comp) -{ - typedef typename thrust::iterator_traits::value_type T; - - Size1 aIndex = balanced_path_detail::MergePath(first1, n1, first2, n2, diag, comp); - Size1 bIndex = diag - aIndex; - - bool star = false; - if(bIndex < n2) - { - T x = first2[bIndex]; - - // Search for the beginning of the duplicate run in both A and B. - Size1 aStart = balanced_path_detail::BiasedBinarySearch(first1, aIndex, x, levels, comp); - Size1 bStart = balanced_path_detail::BiasedBinarySearch(first2, bIndex, x, levels, comp); - - // The distance between x's merge path and its lower_bound is its rank. - // We add up the a and b ranks and evenly distribute them to - // get a stairstep path. - Size1 aRun = aIndex - aStart; - Size1 bRun = bIndex - bStart; - Size1 xCount = aRun + bRun; - - // Attempt to advance b and regress a. - Size1 bAdvance = thrust::max(xCount >> 1, xCount - aRun); - Size1 bEnd = thrust::min(n2, bStart + bAdvance + 1); - Size1 bRunEnd = balanced_path_detail::BinarySearch(first2 + bIndex, bEnd - bIndex, x, comp) + bIndex; - bRun = bRunEnd - bStart; - - bAdvance = thrust::min(bAdvance, bRun); - Size1 aAdvance = xCount - bAdvance; - - bool roundUp = (aAdvance == bAdvance + 1) && (bAdvance < bRun); - aIndex = aStart + aAdvance; - - if(roundUp) star = true; - } - - return thrust::make_pair(aIndex, (diag - aIndex) + star); -} - - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h b/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h deleted file mode 100644 index 2bbd658456..0000000000 --- a/compat/thrust/system/cuda/detail/detail/cached_temporary_allocator.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - - -template class BasePolicy> - class cached_temporary_allocator - : public BasePolicy > -{ - private: - typedef thrust::detail::temporary_allocator base_allocator_type; - typedef thrust::detail::allocator_traits traits; - typedef typename traits::pointer allocator_pointer; - typedef std::multimap free_blocks_type; - typedef std::map allocated_blocks_type; - - base_allocator_type m_base_allocator; - free_blocks_type free_blocks; - allocated_blocks_type allocated_blocks; - - void free_all() - { - // deallocate all outstanding blocks in both lists - for(free_blocks_type::iterator i = free_blocks.begin(); - i != free_blocks.end(); - ++i) - { - // transform the pointer to allocator_pointer before calling deallocate - traits::deallocate(m_base_allocator, allocator_pointer(reinterpret_cast(i->second)), i->first); - } - - for(allocated_blocks_type::iterator i = allocated_blocks.begin(); - i != allocated_blocks.end(); - ++i) - { - // transform the pointer to allocator_pointer before calling deallocate - traits::deallocate(m_base_allocator, allocator_pointer(reinterpret_cast(i->first)), i->second); - } - } - - public: - cached_temporary_allocator(thrust::execution_policy &system) - : m_base_allocator(system) - {} - - ~cached_temporary_allocator() - { - // free all allocations when cached_allocator goes out of scope - free_all(); - } - - void *allocate(std::ptrdiff_t num_bytes) - { - void *result = 0; - - // search the cache for a free block - free_blocks_type::iterator free_block = free_blocks.find(num_bytes); - - if(free_block != free_blocks.end()) - { - // get the pointer - result = free_block->second; - - // erase from the free_blocks map - free_blocks.erase(free_block); - } - else - { - // no allocation of the right size exists - // create a new one with m_base_allocator - // allocate memory and convert to raw pointer - result = thrust::raw_pointer_cast(traits::allocate(m_base_allocator, num_bytes)); - } - - // insert the allocated pointer into the allocated_blocks map - allocated_blocks.insert(std::make_pair(result, num_bytes)); - - return result; - } - - void deallocate(void *ptr) - { - // erase the allocated block from the allocated blocks map - allocated_blocks_type::iterator iter = allocated_blocks.find(ptr); - std::ptrdiff_t num_bytes = iter->second; - allocated_blocks.erase(iter); - - // insert the block into the free blocks map - free_blocks.insert(std::make_pair(num_bytes, ptr)); - } -}; - - -// overload get_temporary_buffer on cached_temporary_allocator -// note that we take a reference to cached_temporary_allocator -template class BasePolicy> - thrust::pair - get_temporary_buffer(cached_temporary_allocator &alloc, std::ptrdiff_t n) -{ - // ask the allocator for sizeof(T) * n bytes - T* result = reinterpret_cast(alloc.allocate(sizeof(T) * n)); - - // return the pointer and the number of elements allocated - return thrust::make_pair(result,n); -} - - -// overload return_temporary_buffer on cached_temporary_allocator -// an overloaded return_temporary_buffer should always accompany -// an overloaded get_temporary_buffer -template class BasePolicy> - void return_temporary_buffer(cached_temporary_allocator &alloc, Pointer p) -{ - // return the pointer to the allocator - alloc.deallocate(thrust::raw_pointer_cast(p)); -} - - -} // end detail -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/detail/fast_scan.h b/compat/thrust/system/cuda/detail/detail/fast_scan.h deleted file mode 100644 index d095a4a2db..0000000000 --- a/compat/thrust/system/cuda/detail/detail/fast_scan.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file fast_scan.h - * \brief A fast scan for primitive types. - */ - -#pragma once - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ -namespace fast_scan -{ - -template -OutputIterator inclusive_scan(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryFunction binary_op); - -template -OutputIterator exclusive_scan(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - const T init, - BinaryFunction binary_op); - -} // end namespace fast_scan -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include "fast_scan.inl" - diff --git a/compat/thrust/system/cuda/detail/detail/fast_scan.inl b/compat/thrust/system/cuda/detail/detail/fast_scan.inl deleted file mode 100644 index b02763d8a9..0000000000 --- a/compat/thrust/system/cuda/detail/detail/fast_scan.inl +++ /dev/null @@ -1,753 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - - -namespace thrust -{ -namespace detail -{ - -// forward declaration of temporary_array -template class temporary_array; - -} // end detail - -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ -namespace fast_scan -{ -namespace fast_scan_detail -{ - - -// TODO tune this -template -struct inclusive_scan_block_size -{ - private: - static const unsigned int max_memory = 16384 - 256 - 2 * sizeof(ValueType); - static const unsigned int max_block_size = max_memory / sizeof(ValueType); - static const unsigned int default_block_size = 7 * 32; - static const unsigned int block_size = (max_block_size < default_block_size) ? max_block_size : default_block_size; - - public: - static const unsigned int pass1 = block_size; - static const unsigned int pass2 = block_size; - static const unsigned int pass3 = block_size; -}; - -// TODO tune this -template -struct exclusive_scan_block_size -{ - private: - static const unsigned int max_memory = 16384 - 256 - 2 * sizeof(ValueType); - static const unsigned int max_block_size = max_memory / sizeof(ValueType); - static const unsigned int default_block_size = 5 * 32; - static const unsigned int block_size = (max_block_size < default_block_size) ? max_block_size : default_block_size; - - public: - static const unsigned int pass1 = block_size; - static const unsigned int pass2 = block_size; - static const unsigned int pass3 = block_size; -}; - - -template -__device__ __thrust_forceinline__ -void scan_block(Context context, SharedArray array, BinaryFunction binary_op) -{ - typedef typename thrust::iterator_value::type T; - - T val = array[context.thread_index()]; - - if (CTA_SIZE > 1) { if(context.thread_index() >= 1) { T tmp = array[context.thread_index() - 1]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 2) { if(context.thread_index() >= 2) { T tmp = array[context.thread_index() - 2]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 4) { if(context.thread_index() >= 4) { T tmp = array[context.thread_index() - 4]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 8) { if(context.thread_index() >= 8) { T tmp = array[context.thread_index() - 8]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 16) { if(context.thread_index() >= 16) { T tmp = array[context.thread_index() - 16]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 32) { if(context.thread_index() >= 32) { T tmp = array[context.thread_index() - 32]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 64) { if(context.thread_index() >= 64) { T tmp = array[context.thread_index() - 64]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 128) { if(context.thread_index() >= 128) { T tmp = array[context.thread_index() - 128]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 256) { if(context.thread_index() >= 256) { T tmp = array[context.thread_index() - 256]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 512) { if(context.thread_index() >= 512) { T tmp = array[context.thread_index() - 512]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 1024) { if(context.thread_index() >= 1024) { T tmp = array[context.thread_index() - 1024]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } -} - -template -__device__ __thrust_forceinline__ -void scan_block_n(Context context, SharedArray array, const unsigned int n, BinaryFunction binary_op) -{ - typedef typename thrust::iterator_value::type T; - - T val = array[context.thread_index()]; - - if (CTA_SIZE > 1) { if(context.thread_index() < n && context.thread_index() >= 1) { T tmp = array[context.thread_index() - 1]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 2) { if(context.thread_index() < n && context.thread_index() >= 2) { T tmp = array[context.thread_index() - 2]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 4) { if(context.thread_index() < n && context.thread_index() >= 4) { T tmp = array[context.thread_index() - 4]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 8) { if(context.thread_index() < n && context.thread_index() >= 8) { T tmp = array[context.thread_index() - 8]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 16) { if(context.thread_index() < n && context.thread_index() >= 16) { T tmp = array[context.thread_index() - 16]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 32) { if(context.thread_index() < n && context.thread_index() >= 32) { T tmp = array[context.thread_index() - 32]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 64) { if(context.thread_index() < n && context.thread_index() >= 64) { T tmp = array[context.thread_index() - 64]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 128) { if(context.thread_index() < n && context.thread_index() >= 128) { T tmp = array[context.thread_index() - 128]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 256) { if(context.thread_index() < n && context.thread_index() >= 256) { T tmp = array[context.thread_index() - 256]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 512) { if(context.thread_index() < n && context.thread_index() >= 512) { T tmp = array[context.thread_index() - 512]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } - if (CTA_SIZE > 1024) { if(context.thread_index() < n && context.thread_index() >= 1024) { T tmp = array[context.thread_index() - 1024]; val = binary_op(tmp, val); } context.barrier(); array[context.thread_index()] = val; context.barrier(); } -} - -template -__device__ __thrust_forceinline__ -void load_block(Context context, - const unsigned int n, - InputIterator input, - ValueType (&sdata)[K][CTA_SIZE + 1]) -{ - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = k*CTA_SIZE + context.thread_index(); - - if (FullBlock || offset < n) - { - InputIterator temp = input + offset; - sdata[offset % K][offset / K] = *temp; - } - } - - context.barrier(); -} - -template -__device__ __thrust_forceinline__ -void store_block(Context context, - const unsigned int n, - OutputIterator output, - ValueType (&sdata)[K][CTA_SIZE + 1], - ValueType& carry) -{ - if (Inclusive) - { - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = k*CTA_SIZE + context.thread_index(); - - if (FullBlock || offset < n) - { - OutputIterator temp = output + offset; - *temp = sdata[offset % K][offset / K]; - } - } - } - else - { - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = k*CTA_SIZE + context.thread_index(); - - if (FullBlock || offset < n) - { - OutputIterator temp = output + offset; - *temp = (offset == 0) ? carry : sdata[(offset - 1) % K][(offset - 1) / K]; - } - } - } -} - -template -__device__ __thrust_forceinline__ -void upsweep_body(Context context, - const unsigned int n, - const bool carry_in, - InputIterator input, - BinaryFunction binary_op, - ValueType (&sdata)[K][CTA_SIZE + 1], - ValueType& carry) -{ - // read data - load_block(context, n, input, sdata); - - // copy into local array - ValueType ldata[K]; - for (unsigned int k = 0; k < K; k++) - ldata[k] = sdata[k][context.thread_index()]; - - // carry in - if (context.thread_index() == 0 && carry_in) - { - // XXX WAR sm_10 issue - ValueType tmp = carry; - ldata[0] = binary_op(tmp, ldata[0]); - } - - // scan local values - for(unsigned int k = 1; k < K; k++) - { - const unsigned int offset = K * context.thread_index() + k; - - if (FullBlock || offset < n) - ldata[k] = binary_op(ldata[k-1],ldata[k]); - } - - sdata[K - 1][context.thread_index()] = ldata[K - 1]; - - context.barrier(); - - // second level scan - if (FullBlock && sizeof(ValueType) > 1) // TODO investigate why this WAR is necessary - scan_block(context, sdata[K - 1], binary_op); - else - scan_block_n(context, sdata[K - 1], n / K, binary_op); - - // store carry out - if (FullBlock) - { - if (context.thread_index() == CTA_SIZE - 1) - carry = sdata[K - 1][context.thread_index()]; - } - else - { - if (context.thread_index() == (n - 1) / K) - { - ValueType sum; - - for (unsigned int k = 0; k < K; k++) - if ((n - 1) % K == k) - sum = ldata[k]; - - if (context.thread_index() > 0) - { - // WAR sm_10 issue - ValueType tmp = sdata[K - 1][context.thread_index() - 1]; - sum = binary_op(tmp, sum); - } - - carry = sum; - } - } - - context.barrier(); -} - -template -__device__ __thrust_forceinline__ -void scan_body(Context context, - const unsigned int n, - const bool carry_in, - InputIterator input, - OutputIterator output, - BinaryFunction binary_op, - ValueType (&sdata)[K][CTA_SIZE + 1], - ValueType& carry) -{ - // read data - load_block(context, n, input, sdata); - - // copy into local array - ValueType ldata[K]; - for (unsigned int k = 0; k < K; k++) - ldata[k] = sdata[k][context.thread_index()]; - - // carry in - if (context.thread_index() == 0 && carry_in) - { - // XXX WAR sm_10 issue - ValueType tmp = carry; - ldata[0] = binary_op(tmp, ldata[0]); - } - - // scan local values - for(unsigned int k = 1; k < K; k++) - { - const unsigned int offset = K * context.thread_index() + k; - - if (FullBlock || offset < n) - ldata[k] = binary_op(ldata[k-1],ldata[k]); - } - - sdata[K - 1][context.thread_index()] = ldata[K - 1]; - - context.barrier(); - - // second level scan - if (FullBlock) - scan_block(context, sdata[K - 1], binary_op); - else - scan_block_n(context, sdata[K - 1], n / K, binary_op); - - // update local values - if (context.thread_index() > 0) - { - ValueType left = sdata[K - 1][context.thread_index() - 1]; - - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = K * context.thread_index() + k; - - if (FullBlock || offset < n) - ldata[k] = binary_op(left, ldata[k]); - } - } - - for (unsigned int k = 0; k < K; k++) - sdata[k][context.thread_index()] = ldata[k]; - - context.barrier(); - - // write data - store_block(context, n, output, sdata, carry); - - // store carry out - if (context.thread_index() == 0) - { - if (FullBlock) - carry = sdata[K - 1][CTA_SIZE - 1]; - else - carry = sdata[(n - 1) % K][(n - 1) / K]; // note: this must come after the local update - } - - context.barrier(); -} - -template -struct upsweep_intervals_closure -{ - InputIterator input; - ValueType * block_results; // TODO change this to ValueIterator - BinaryFunction binary_op; - Decomposition decomp; - Context context; - - typedef Context context_type; - - upsweep_intervals_closure(InputIterator input, - ValueType * block_results, - BinaryFunction binary_op, - Decomposition decomp, - Context context = Context()) - : input(input), block_results(block_results), binary_op(binary_op), decomp(decomp), context(context) {} - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename Decomposition::index_type IndexType; - - const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value; - -#if __CUDA_ARCH__ >= 200 - const unsigned int SMEM = (48 * 1024); -#else - const unsigned int SMEM = (16 * 1024) - 256; -#endif - const unsigned int MAX_K = ((SMEM - 1 * sizeof(ValueType)) / (sizeof(ValueType) * (CTA_SIZE + 1))); - const unsigned int K = (MAX_K < 6) ? MAX_K : 6; - - __shared__ uninitialized sdata; // padded to avoid bank conflicts - - __shared__ uninitialized carry; // storage for carry out - if(context.thread_index() == 0) carry.construct(); - - context.barrier(); - - thrust::system::detail::internal::index_range interval = decomp[context.block_index()]; - - IndexType base = interval.begin(); - - input += base; - - const unsigned int unit_size = K * CTA_SIZE; - - bool carry_in = false; - - // process full units - while (base + unit_size <= interval.end()) - { - const unsigned int n = unit_size; - upsweep_body(context, n, carry_in, input, binary_op, sdata.get(), carry.get()); - base += unit_size; - input += unit_size; - carry_in = true; - } - - // process partially full unit at end of input (if necessary) - if (base < interval.end()) - { - const unsigned int n = interval.end() - base; - upsweep_body(context, n, carry_in, input, binary_op, sdata.get(), carry.get()); - } - - // write interval sum - if (context.thread_index() == 0) - block_results[context.block_index()] = carry; - } -}; - - -template -struct downsweep_intervals_closure -{ - InputIterator input; - OutputIterator output; - ValueType * block_results; - BinaryFunction binary_op; - Decomposition decomp; - Context context; - - typedef Context context_type; - - downsweep_intervals_closure(InputIterator input, - OutputIterator output, - ValueType * block_results, - BinaryFunction binary_op, - Decomposition decomp, - Context context = Context()) - : input(input), output(output), block_results(block_results), binary_op(binary_op), decomp(decomp), context(context) {} - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename Decomposition::index_type IndexType; - - const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value; - -#if __CUDA_ARCH__ >= 200 - const unsigned int SMEM = (48 * 1024); -#else - const unsigned int SMEM = (16 * 1024) - 256; -#endif - const unsigned int MAX_K = ((SMEM - 1 * sizeof(ValueType))/ (sizeof(ValueType) * (CTA_SIZE + 1))); - const unsigned int K = (MAX_K < 6) ? MAX_K : 6; - - __shared__ uninitialized sdata; // padded to avoid bank conflicts - - __shared__ uninitialized carry; // storage for carry in and carry out - if(context.thread_index() == 0) carry.construct(); - - context.barrier(); - - thrust::system::detail::internal::index_range interval = decomp[context.block_index()]; - - IndexType base = interval.begin(); - - input += base; - output += base; - - const unsigned int unit_size = K * CTA_SIZE; - - bool carry_in = (Inclusive && context.block_index() == 0) ? false : true; - - if (carry_in) - { - if (context.thread_index() == 0) - carry = block_results[context.block_index()]; - context.barrier(); - } - - // process full units - while (base + unit_size <= interval.end()) - { - const unsigned int n = unit_size; - scan_body(context, n, carry_in, input, output, binary_op, sdata.get(), carry.get()); - base += K * CTA_SIZE; - input += K * CTA_SIZE; - output += K * CTA_SIZE; - carry_in = true; - } - - // process partially full unit at end of input (if necessary) - if (base < interval.end()) - { - const unsigned int n = interval.end() - base; - scan_body(context, n, carry_in, input, output, binary_op, sdata.get(), carry.get()); - } - } -}; - - -} // end namespace fast_scan_detail - - -template -OutputIterator inclusive_scan(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryFunction binary_op) -{ - using namespace fast_scan_detail; - - // the pseudocode for deducing the type of the temporary used below: - // - // if BinaryFunction is AdaptableBinaryFunction - // TemporaryType = AdaptableBinaryFunction::result_type - // else if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - typedef typename thrust::detail::eval_if< - thrust::detail::has_result_type::value, - thrust::detail::result_type, - thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - typedef unsigned int IndexType; - typedef thrust::system::detail::internal::uniform_decomposition Decomposition; - typedef thrust::detail::temporary_array ValueArray; - - if (first == last) - return output; - - Decomposition decomp = thrust::system::cuda::detail::default_decomposition(last - first); - - ValueArray block_results(exec, decomp.size()); - - // compute sum over each interval - if (thrust::detail::is_commutative::value) - { - // use reduce_intervals for commutative operators - thrust::system::cuda::detail::reduce_intervals(exec, first, block_results.begin(), binary_op, decomp); - } - else - { - const static unsigned int ThreadsPerBlock = inclusive_scan_block_size::pass1; - typedef detail::statically_blocked_thread_array Context; - - typedef upsweep_intervals_closure Closure; - Closure closure(first, - thrust::raw_pointer_cast(&block_results[0]), - binary_op, - decomp); - detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); - } - - // second level inclusive scan of per-block results - { - const static unsigned int ThreadsPerBlock = inclusive_scan_block_size::pass2; - typedef detail::statically_blocked_thread_array Context; - - typedef downsweep_intervals_closure Closure; - Closure closure(thrust::raw_pointer_cast(&block_results[0]), - thrust::raw_pointer_cast(&block_results[0]), - thrust::raw_pointer_cast(&block_results[0]), // not used - binary_op, - Decomposition(decomp.size(), 1, 1)); - detail::launch_closure(closure, 1, ThreadsPerBlock); - } - - // update intervals with result of second level scan - { - const static unsigned int ThreadsPerBlock = inclusive_scan_block_size::pass3; - typedef detail::statically_blocked_thread_array Context; - - typedef downsweep_intervals_closure Closure; - Closure closure(first, - output, - thrust::raw_pointer_cast(&block_results[0]) - 1, // shift block results - binary_op, - decomp); - detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); - } - - return output + (last - first); -} - - -template -OutputIterator exclusive_scan(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - const T init, - BinaryFunction binary_op) -{ - using namespace fast_scan_detail; - - // the pseudocode for deducing the type of the temporary used below: - // - // if BinaryFunction is AdaptableBinaryFunction - // TemporaryType = AdaptableBinaryFunction::result_type - // else if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - typedef typename thrust::detail::eval_if< - thrust::detail::has_result_type::value, - thrust::detail::result_type, - thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - typedef unsigned int IndexType; - typedef thrust::system::detail::internal::uniform_decomposition Decomposition; - typedef thrust::detail::temporary_array ValueArray; - - if (first == last) - return output; - - Decomposition decomp = thrust::system::cuda::detail::default_decomposition(last - first); - - ValueArray block_results(exec, decomp.size() + 1); - - // compute sum over each interval - if (thrust::detail::is_commutative::value) - { - // use reduce_intervals for commutative operators - thrust::system::cuda::detail::reduce_intervals(exec, first, block_results.begin() + 1, binary_op, decomp); - } - else - { - const static unsigned int ThreadsPerBlock = exclusive_scan_block_size::pass1; - typedef detail::statically_blocked_thread_array Context; - - typedef upsweep_intervals_closure Closure; - Closure closure(first, - thrust::raw_pointer_cast(&block_results[0]) + 1, - binary_op, - decomp); - detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); - } - - // place init before per-block results - block_results[0] = init; - - // second level inclusive scan of per-block results - { - const static unsigned int ThreadsPerBlock = exclusive_scan_block_size::pass2; - typedef detail::statically_blocked_thread_array Context; - - typedef downsweep_intervals_closure Closure; - Closure closure(thrust::raw_pointer_cast(&block_results[0]), - thrust::raw_pointer_cast(&block_results[0]), - thrust::raw_pointer_cast(&block_results[0]), // not used - binary_op, - Decomposition(decomp.size() + 1, 1, 1)); - detail::launch_closure(closure, 1, ThreadsPerBlock); - } - - // update intervals with result of second level scan - { - const static unsigned int ThreadsPerBlock = exclusive_scan_block_size::pass3; - typedef detail::statically_blocked_thread_array Context; - - typedef downsweep_intervals_closure Closure; - Closure closure(first, - output, - thrust::raw_pointer_cast(&block_results[0]), // shift block results - binary_op, - decomp); - detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); - } - - return output + (last - first); -} - - -} // end namespace fast_scan -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - diff --git a/compat/thrust/system/cuda/detail/detail/launch_calculator.h b/compat/thrust/system/cuda/detail/detail/launch_calculator.h deleted file mode 100644 index 5126aa6f1a..0000000000 --- a/compat/thrust/system/cuda/detail/detail/launch_calculator.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template -class launch_calculator -{ - device_properties_t properties; - function_attributes_t attributes; - - public: - - launch_calculator(void); - - launch_calculator(const device_properties_t& properties, const function_attributes_t& attributes); - - thrust::tuple with_variable_block_size(void) const; - - template - thrust::tuple with_variable_block_size(UnaryFunction block_size_to_smem_size) const; - - thrust::tuple with_variable_block_size_available_smem(void) const; - - private: - - /*! Returns a pair (num_threads_per_block, num_blocks_per_multiprocessor) - * where num_threads_per_block is a valid block size for an instance of Closure - * chosen by a heuristic and num_blocks_per_multiprocessor is the maximum - * number of such blocks that can execute on a streaming multiprocessor at once. - */ - thrust::pair default_block_configuration() const; - - /*! Returns a pair (num_threads_per_block, num_blocks_per_multiprocessor) - * where num_threads_per_block is a valid block size for an instance of Closure - * chosen by a heuristic and num_blocks_per_multiprocessor is the maximum - * number of such blocks that can execute on a streaming multiprocessor at once. - * - * \param block_size_to_smem_size Mapping from num_threads_per_block to number of - * dynamically-allocated bytes of shared memory - */ - template - thrust::pair default_block_configuration(UnaryFunction block_size_to_smem_size) const; -}; - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/detail/launch_calculator.inl b/compat/thrust/system/cuda/detail/detail/launch_calculator.inl deleted file mode 100644 index b851d5fe13..0000000000 --- a/compat/thrust/system/cuda/detail/detail/launch_calculator.inl +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// do not attempt to compile this file with any other compiler -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template -launch_calculator::launch_calculator(void) - : properties(device_properties()), - attributes(closure_attributes()) -{} - -template -launch_calculator::launch_calculator(const device_properties_t& properties, const function_attributes_t& attributes) - : properties(properties), - attributes(attributes) -{} - -template - template -thrust::pair launch_calculator::default_block_configuration(UnaryFunction block_size_to_smem_size) const -{ - // choose a block size - std::size_t num_threads_per_block = block_size_with_maximum_potential_occupancy(attributes, properties, block_size_to_smem_size); - - // choose a subscription rate - std::size_t num_blocks_per_multiprocessor = properties.maxThreadsPerMultiProcessor / num_threads_per_block; - - return thrust::make_pair(num_threads_per_block, num_blocks_per_multiprocessor); -} - - -template -thrust::pair launch_calculator::default_block_configuration(void) const -{ - // choose a block size - std::size_t num_threads_per_block = block_size_with_maximum_potential_occupancy(attributes, properties); - - // choose a subscription rate - std::size_t num_blocks_per_multiprocessor = properties.maxThreadsPerMultiProcessor / num_threads_per_block; - - return thrust::make_pair(num_threads_per_block, num_blocks_per_multiprocessor); -} - -template -thrust::tuple launch_calculator::with_variable_block_size(void) const -{ - thrust::pair config = default_block_configuration(); - return thrust::tuple(config.second * properties.multiProcessorCount, config.first, 0); -} - -template - template -thrust::tuple launch_calculator::with_variable_block_size(UnaryFunction block_size_to_smem_size) const -{ - thrust::pair config = default_block_configuration(block_size_to_smem_size); - return thrust::tuple(config.second * properties.multiProcessorCount, config.first, block_size_to_smem_size(config.first)); -} - -template -thrust::tuple launch_calculator::with_variable_block_size_available_smem(void) const -{ - thrust::pair config = default_block_configuration(); - size_t smem_per_block = proportional_smem_allocation(properties, attributes, config.second); - return thrust::tuple(config.second * properties.multiProcessorCount, config.first, smem_per_block); -} - -} // end detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#endif // THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - diff --git a/compat/thrust/system/cuda/detail/detail/launch_closure.h b/compat/thrust/system/cuda/detail/detail/launch_closure.h deleted file mode 100644 index c2e6c4344f..0000000000 --- a/compat/thrust/system/cuda/detail/detail/launch_closure.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template -struct launch_bounds -{ - typedef thrust::detail::integral_constant ThreadsPerBlock; - typedef thrust::detail::integral_constant BlocksPerMultiprocessor; -}; - -struct thread_array : public launch_bounds<> -{ -// CUDA built-in variables require nvcc -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return threadIdx.x; } - __device__ __thrust_forceinline__ unsigned int thread_count(void) const { return blockDim.x * gridDim.x; } -#else - __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int thread_count(void) const { return 0; } -#endif // THRUST_DEVICE_COMPILER_NVCC -}; - -struct blocked_thread_array : public launch_bounds<> -{ -// CUDA built-in variables require nvcc -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return threadIdx.x; } - __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return blockDim.x; } - __device__ __thrust_forceinline__ unsigned int block_index(void) const { return blockIdx.x; } - __device__ __thrust_forceinline__ unsigned int grid_dimension(void) const { return gridDim.x; } - __device__ __thrust_forceinline__ unsigned int linear_index(void) const { return block_dimension() * block_index() + thread_index(); } - __device__ __thrust_forceinline__ void barrier(void) { __syncthreads(); } -#else - __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int block_index(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int grid_dimension(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int linear_index(void) const { return 0; } - __device__ __thrust_forceinline__ void barrier(void) { } -#endif // THRUST_DEVICE_COMPILER_NVCC -}; - -template -struct statically_blocked_thread_array : public launch_bounds<_ThreadsPerBlock,1> -{ -// CUDA built-in variables require nvcc -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return threadIdx.x; } - __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return _ThreadsPerBlock; } // minor optimization - __device__ __thrust_forceinline__ unsigned int block_index(void) const { return blockIdx.x; } - __device__ __thrust_forceinline__ unsigned int grid_dimension(void) const { return gridDim.x; } - __device__ __thrust_forceinline__ unsigned int linear_index(void) const { return block_dimension() * block_index() + thread_index(); } - __device__ __thrust_forceinline__ void barrier(void) { __syncthreads(); } -#else - __device__ __thrust_forceinline__ unsigned int thread_index(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int block_dimension(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int block_index(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int grid_dimension(void) const { return 0; } - __device__ __thrust_forceinline__ unsigned int linear_index(void) const { return 0; } - __device__ __thrust_forceinline__ void barrier(void) { } -#endif // THRUST_DEVICE_COMPILER_NVCC -}; - -template - void launch_closure(Closure f, Size1 num_blocks, Size2 block_size); - -template - void launch_closure(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size); - -/*! Returns a copy of the cudaFuncAttributes structure - * that is associated with a given Closure - */ -template -function_attributes_t closure_attributes(void); - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/detail/launch_closure.inl b/compat/thrust/system/cuda/detail/detail/launch_closure.inl deleted file mode 100644 index ce39cfc136..0000000000 --- a/compat/thrust/system/cuda/detail/detail/launch_closure.inl +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace detail -{ - -// XXX WAR circular inclusion problems with this forward declaration -template class temporary_array; - -} // end detail - -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC -template -__global__ __launch_bounds__(Closure::context_type::ThreadsPerBlock::value, Closure::context_type::BlocksPerMultiprocessor::value) -void launch_closure_by_value(Closure f) -{ - f(); -} - -template -__global__ __launch_bounds__(Closure::context_type::ThreadsPerBlock::value, Closure::context_type::BlocksPerMultiprocessor::value) -void launch_closure_by_pointer(const Closure *f) -{ - // copy to registers - Closure f_reg = *f; - f_reg(); -} -#else -template -void launch_closure_by_value(Closure) {} - -template -void launch_closure_by_pointer(const Closure *) {} - -#endif // THRUST_DEVICE_COMPILER_NVCC - -template - struct closure_launcher_base -{ - typedef void (*launch_function_t)(Closure); - - static launch_function_t get_launch_function(void) - { - return launch_closure_by_value; - } - - template - static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size) - { -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - if(num_blocks > 0) - { - launch_closure_by_value<<<(unsigned int) num_blocks, (unsigned int) block_size, (unsigned int) smem_size>>>(f); - synchronize_if_enabled("launch_closure_by_value"); - } -#endif // THRUST_DEVICE_COMPILER_NVCC - } -}; // end closure_launcher_base - - -template - struct closure_launcher_base -{ - typedef void (*launch_function_t)(const Closure *); - - static launch_function_t get_launch_function(void) - { - return launch_closure_by_pointer; - } - - template - static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size) - { -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - if(num_blocks > 0) - { - // use temporary storage for the closure - // XXX use of cuda::tag is too specific here - thrust::cuda::tag cuda_tag; - thrust::host_system_tag host_tag; - thrust::detail::temporary_array closure_storage(cuda_tag, host_tag, &f, &f + 1); - - // launch - detail::launch_closure_by_pointer<<<(unsigned int) num_blocks, (unsigned int) block_size, (unsigned int) smem_size>>>((&closure_storage[0]).get()); - synchronize_if_enabled("launch_closure_by_pointer"); - } -#endif // THRUST_DEVICE_COMPILER_NVCC - } -}; - - -template - struct closure_launcher - : public closure_launcher_base -{ - typedef closure_launcher_base super_t; - - static inline const device_properties_t& device_properties(void) - { - return device_properties(); - } - - static inline function_attributes_t function_attributes(void) - { - return thrust::system::cuda::detail::function_attributes(super_t::get_launch_function()); - } - - template - static void launch(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size) - { - super_t::launch(f,num_blocks,block_size,smem_size); - } -}; - -template - void launch_closure(Closure f, Size num_blocks) -{ - launch_calculator calculator; - launch_closure(f, num_blocks, thrust::get<1>(calculator.with_variable_block_size())); -} // end launch_closure() - -template - void launch_closure(Closure f, Size1 num_blocks, Size2 block_size) -{ - launch_closure(f, num_blocks, block_size, 0u); -} // end launch_closure() - -template - void launch_closure(Closure f, Size1 num_blocks, Size2 block_size, Size3 smem_size) -{ - closure_launcher::launch(f, num_blocks, block_size, smem_size); -} // end launch_closure() - - -template -function_attributes_t closure_attributes(void) -{ - typedef closure_launcher Launcher; - - // cache the result of function_attributes(), because it is slow - // only cache the first few devices - static const int max_num_devices = 16; - - static bool attributes_exist[max_num_devices] = {0}; - static function_attributes_t function_attributes[max_num_devices] = {}; - - // XXX device_id ought to be an argument to this function - int device_id = current_device(); - - if(device_id >= max_num_devices) - { - return thrust::system::cuda::detail::function_attributes(Launcher::get_launch_function()); - } - - if(!attributes_exist[device_id]) - { - function_attributes[device_id] = thrust::system::cuda::detail::function_attributes(Launcher::get_launch_function()); - - // disallow the compiler to move the write to attributes_exist[device_id] - // before the initialization of function_attributes[device_id] - __thrust_compiler_fence(); - - attributes_exist[device_id] = true; - } - - return function_attributes[device_id]; -} - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/set_operation.h b/compat/thrust/system/cuda/detail/detail/set_operation.h deleted file mode 100644 index 5475731edb..0000000000 --- a/compat/thrust/system/cuda/detail/detail/set_operation.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template - RandomAccessIterator3 set_operation(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp, - SetOperation set_op); - - -} // end detail -} // end detail -} // end cuda -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/detail/set_operation.inl b/compat/thrust/system/cuda/detail/detail/set_operation.inl deleted file mode 100644 index 3f14379628..0000000000 --- a/compat/thrust/system/cuda/detail/detail/set_operation.inl +++ /dev/null @@ -1,639 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ -namespace set_operation_detail -{ - - -using thrust::system::cuda::detail::detail::statically_blocked_thread_array; -using thrust::detail::uint16_t; -using thrust::detail::uint32_t; - - -// empirically determined on sm_20 -// value_types larger than this will fail to launch if placed in smem -template - struct stage_through_smem -{ - static const bool value = sizeof(T) <= 6 * sizeof(uint32_t); -}; - - -// max_input_size <= 32 -template -inline __device__ - OutputIterator serial_bounded_copy_if(Size max_input_size, - InputIterator first, - uint32_t mask, - OutputIterator result) -{ - for(Size i = 0; i < max_input_size; ++i, ++first) - { - if((1< - struct find_partition_offsets_functor -{ - Size partition_size; - InputIterator1 first1; - InputIterator2 first2; - Size n1, n2; - Compare comp; - - find_partition_offsets_functor(Size partition_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - Compare comp) - : partition_size(partition_size), - first1(first1), first2(first2), - n1(last1 - first1), n2(last2 - first2), - comp(comp) - {} - - inline __host__ __device__ - thrust::pair operator()(Size i) const - { - Size diag = thrust::min(n1 + n2, i * partition_size); - - // XXX the correctness of balanced_path depends critically on the ll suffix below - // why??? - return balanced_path(first1, n1, first2, n2, diag, 4ll, comp); - } -}; - - -template - OutputIterator find_partition_offsets(thrust::cuda::execution_policy &exec, - Size num_partitions, - Size partition_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - OutputIterator result, - Compare comp) -{ - find_partition_offsets_functor f(partition_size, first1, last1, first2, last2, comp); - - return thrust::transform(exec, - thrust::counting_iterator(0), - thrust::counting_iterator(num_partitions), - result, - f); -} - - -namespace block -{ - - -template -inline __device__ -T right_neighbor(statically_blocked_thread_array &ctx, const T &x, const T &boundary) -{ - // stage this shift to conserve smem - const unsigned int storage_size = block_size / 2; - __shared__ uninitialized_array shared; - - T result = x; - - unsigned int tid = ctx.thread_index(); - - if(0 < tid && tid <= storage_size) - { - shared[tid - 1] = x; - } - - ctx.barrier(); - - if(tid < storage_size) - { - result = shared[tid]; - } - - ctx.barrier(); - - tid -= storage_size; - if(0 < tid && tid <= storage_size) - { - shared[tid - 1] = x; - } - else if(tid == 0) - { - shared[storage_size-1] = boundary; - } - - ctx.barrier(); - - if(tid < storage_size) - { - result = shared[tid]; - } - - ctx.barrier(); - - return result; -} - - -template -inline __device__ - unsigned int bounded_count_set_operation_n(statically_blocked_thread_array &ctx, - InputIterator1 first1, uint16_t n1, - InputIterator2 first2, uint16_t n2, - Compare comp, - SetOperation set_op) -{ - unsigned int thread_idx = ctx.thread_index(); - - // find partition offsets - uint16_t diag = thrust::min(n1 + n2, thread_idx * work_per_thread); - thrust::pair thread_input_begin = balanced_path(first1, n1, first2, n2, diag, 2, comp); - thrust::pair thread_input_end = block::right_neighbor(ctx, thread_input_begin, thrust::make_pair(n1,n2)); - - __shared__ uint16_t s_thread_output_size[block_size]; - - // work_per_thread + 1 to accomodate a "starred" partition returned from balanced_path above - s_thread_output_size[thread_idx] = - set_op.count(work_per_thread + 1, - first1 + thread_input_begin.first, first1 + thread_input_end.first, - first2 + thread_input_begin.second, first2 + thread_input_end.second, - comp); - - ctx.barrier(); - - // reduce per-thread counts - thrust::system::cuda::detail::block::inplace_inclusive_scan(ctx, s_thread_output_size); - return s_thread_output_size[ctx.block_dimension() - 1]; -} - - -inline __device__ int pop_count(unsigned int x) -{ -// guard use of __popc from other compilers -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - return __popc(x); -#else - return x; -#endif -} - - - -template -inline __device__ - OutputIterator bounded_set_operation_n(statically_blocked_thread_array &ctx, - InputIterator1 first1, uint16_t n1, - InputIterator2 first2, uint16_t n2, - OutputIterator result, - Compare comp, - SetOperation set_op) -{ - unsigned int thread_idx = ctx.thread_index(); - - // find partition offsets - uint16_t diag = thrust::min(n1 + n2, thread_idx * work_per_thread); - thrust::pair thread_input_begin = balanced_path(first1, n1, first2, n2, diag, 2, comp); - thrust::pair thread_input_end = block::right_neighbor(ctx, thread_input_begin, thrust::make_pair(n1,n2)); - - typedef typename thrust::iterator_value::type value_type; - // +1 to accomodate a "starred" partition returned from balanced_path above - uninitialized_array sparse_result; - uint32_t active_mask = - set_op(work_per_thread + 1, - first1 + thread_input_begin.first, first1 + thread_input_end.first, - first2 + thread_input_begin.second, first2 + thread_input_end.second, - sparse_result.begin(), - comp); - - __shared__ uint16_t s_thread_output_size[block_size]; - s_thread_output_size[thread_idx] = pop_count(active_mask); - - ctx.barrier(); - - // scan to turn per-thread counts into output indices - uint16_t block_output_size = thrust::system::cuda::detail::block::inplace_exclusive_scan(ctx, s_thread_output_size, 0u); - - serial_bounded_copy_if(work_per_thread + 1, sparse_result.begin(), active_mask, result + s_thread_output_size[thread_idx]); - - ctx.barrier(); - - return result + block_output_size; -} - - -template -inline __device__ - typename thrust::iterator_difference::type - count_set_operation(statically_blocked_thread_array &ctx, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - Compare comp, - SetOperation set_op) -{ - typedef typename thrust::iterator_difference::type difference; - - difference result = 0; - - thrust::pair remaining_input_size = thrust::make_pair(last1 - first1, last2 - first2); - - // iterate until the input is consumed - while(remaining_input_size.first + remaining_input_size.second > 0) - { - // find the end of this subpartition's input - // -1 to accomodate "starred" partitions - uint16_t max_subpartition_size = block_size * work_per_thread - 1; - difference diag = thrust::min(remaining_input_size.first + remaining_input_size.second, max_subpartition_size); - thrust::pair subpartition_size = balanced_path(first1, remaining_input_size.first, first2, remaining_input_size.second, diag, 4ll, comp); - - typedef typename thrust::iterator_value::type value_type; - if(stage_through_smem::value) - { - // load the input into __shared__ storage - __shared__ uninitialized_array s_input; - - value_type *s_input_end1 = thrust::system::cuda::detail::block::copy_n(ctx, first1, subpartition_size.first, s_input.begin()); - value_type *s_input_end2 = thrust::system::cuda::detail::block::copy_n(ctx, first2, subpartition_size.second, s_input_end1); - - result += block::bounded_count_set_operation_n(ctx, - s_input.begin(), subpartition_size.first, - s_input_end1, subpartition_size.second, - comp, - set_op); - } - else - { - result += block::bounded_count_set_operation_n(ctx, - first1, subpartition_size.first, - first2, subpartition_size.second, - comp, - set_op); - } - - // advance input - first1 += subpartition_size.first; - first2 += subpartition_size.second; - - // decrement remaining size - remaining_input_size.first -= subpartition_size.first; - remaining_input_size.second -= subpartition_size.second; - } - - return result; -} - - -template -inline __device__ -OutputIterator set_operation(statically_blocked_thread_array &ctx, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - OutputIterator result, - Compare comp, - SetOperation set_op) -{ - typedef typename thrust::iterator_difference::type difference; - - thrust::pair remaining_input_size = thrust::make_pair(last1 - first1, last2 - first2); - - // iterate until the input is consumed - while(remaining_input_size.first + remaining_input_size.second > 0) - { - // find the end of this subpartition's input - // -1 to accomodate "starred" partitions - uint16_t max_subpartition_size = block_size * work_per_thread - 1; - difference diag = thrust::min(remaining_input_size.first + remaining_input_size.second, max_subpartition_size); - thrust::pair subpartition_size = balanced_path(first1, remaining_input_size.first, first2, remaining_input_size.second, diag, 4ll, comp); - - typedef typename thrust::iterator_value::type value_type; - if(stage_through_smem::value) - { - // load the input into __shared__ storage - __shared__ uninitialized_array s_input; - - value_type *s_input_end1 = thrust::system::cuda::detail::block::copy_n(ctx, first1, subpartition_size.first, s_input.begin()); - value_type *s_input_end2 = thrust::system::cuda::detail::block::copy_n(ctx, first2, subpartition_size.second, s_input_end1); - - result = block::bounded_set_operation_n(ctx, - s_input.begin(), subpartition_size.first, - s_input_end1, subpartition_size.second, - result, - comp, - set_op); - } - else - { - result = block::bounded_set_operation_n(ctx, - first1, subpartition_size.first, - first2, subpartition_size.second, - result, - comp, - set_op); - } - - // advance input - first1 += subpartition_size.first; - first2 += subpartition_size.second; - - // decrement remaining size - remaining_input_size.first -= subpartition_size.first; - remaining_input_size.second -= subpartition_size.second; - } - - return result; -} - - -} // end namespace block - - -template - inline __device__ void count_set_operation(statically_blocked_thread_array &ctx, - InputIterator1 input_partition_offsets, - Size num_partitions, - InputIterator2 first1, - InputIterator3 first2, - OutputIterator result, - Compare comp, - SetOperation set_op) -{ - // consume partitions - for(Size partition_idx = ctx.block_index(); - partition_idx < num_partitions; - partition_idx += ctx.grid_dimension()) - { - typedef typename thrust::iterator_difference::type difference; - - // find the partition - thrust::pair block_input_begin = input_partition_offsets[partition_idx]; - thrust::pair block_input_end = input_partition_offsets[partition_idx + 1]; - - // count the size of the set operation - difference count = block::count_set_operation(ctx, - first1 + block_input_begin.first, first1 + block_input_end.first, - first2 + block_input_begin.second, first2 + block_input_end.second, - comp, - set_op); - - if(ctx.thread_index() == 0) - { - result[partition_idx] = count; - } - } -} - - -template - struct count_set_operation_closure -{ - typedef statically_blocked_thread_array context_type; - - InputIterator1 input_partition_offsets; - Size num_partitions; - InputIterator2 first1; - InputIterator3 first2; - OutputIterator result; - Compare comp; - SetOperation set_op; - - count_set_operation_closure(InputIterator1 input_partition_offsets, - Size num_partitions, - InputIterator2 first1, - InputIterator3 first2, - OutputIterator result, - Compare comp, - SetOperation set_op) - : input_partition_offsets(input_partition_offsets), - num_partitions(num_partitions), - first1(first1), - first2(first2), - result(result), - comp(comp), - set_op(set_op) - {} - - inline __device__ void operator()() const - { - context_type ctx; - count_set_operation(ctx, input_partition_offsets, num_partitions, first1, first2, result, comp, set_op); - } -}; - - -template - count_set_operation_closure - make_count_set_operation_closure(InputIterator1 input_partition_offsets, - Size num_partitions, - InputIterator2 first1, - InputIterator3 first2, - OutputIterator result, - Compare comp, - SetOperation set_op) -{ - typedef count_set_operation_closure result_type; - return result_type(input_partition_offsets,num_partitions,first1,first2,result,comp,set_op); -} - - -template -inline __device__ - void set_operation(statically_blocked_thread_array &ctx, - InputIterator1 input_partition_offsets, - Size num_partitions, - InputIterator2 first1, - InputIterator3 first2, - InputIterator4 output_partition_offsets, - OutputIterator result, - Compare comp, - SetOperation set_op) -{ - // consume partitions - for(Size partition_idx = ctx.block_index(); - partition_idx < num_partitions; - partition_idx += ctx.grid_dimension()) - { - typedef typename thrust::iterator_difference::type difference; - - // find the partition - thrust::pair block_input_begin = input_partition_offsets[partition_idx]; - thrust::pair block_input_end = input_partition_offsets[partition_idx + 1]; - - // do the set operation across the partition - block::set_operation(ctx, - first1 + block_input_begin.first, first1 + block_input_end.first, - first2 + block_input_begin.second, first2 + block_input_end.second, - result + output_partition_offsets[partition_idx], - comp, - set_op); - } -} - - -template - struct set_operation_closure -{ - typedef statically_blocked_thread_array context_type; - - InputIterator1 input_partition_offsets; - Size num_partitions; - InputIterator2 first1; - InputIterator3 first2; - InputIterator4 output_partition_offsets; - OutputIterator result; - Compare comp; - SetOperation set_op; - - set_operation_closure(InputIterator1 input_partition_offsets, - Size num_partitions, - InputIterator2 first1, - InputIterator3 first2, - InputIterator4 output_partition_offsets, - OutputIterator result, - Compare comp, - SetOperation set_op) - : input_partition_offsets(input_partition_offsets), - num_partitions(num_partitions), - first1(first1), - first2(first2), - output_partition_offsets(output_partition_offsets), - result(result), - comp(comp), - set_op(set_op) - {} - - inline __device__ void operator()() const - { - context_type ctx; - set_operation(ctx, input_partition_offsets, num_partitions, first1, first2, output_partition_offsets, result, comp, set_op); - } -}; - - -template - set_operation_closure - make_set_operation_closure(InputIterator1 input_partition_offsets, - Size num_partitions, - InputIterator2 first1, - InputIterator3 first2, - InputIterator4 output_partition_offsets, - OutputIterator result, - Compare comp, - SetOperation set_op) -{ - typedef set_operation_closure result_type; - return result_type(input_partition_offsets,num_partitions,first1,first2,output_partition_offsets,result,comp,set_op); -} - - -} // end namespace set_operation_detail - - -template - OutputIterator set_operation(thrust::cuda::execution_policy &exec, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - OutputIterator result, - Compare comp, - SetOperation set_op) -{ - using thrust::system::cuda::detail::device_properties; - using thrust::system::cuda::detail::detail::launch_closure; - namespace d = thrust::system::cuda::detail::detail::set_operation_detail; - - typedef typename thrust::iterator_difference::type difference; - - const difference n1 = last1 - first1; - const difference n2 = last2 - first2; - - // handle empty input - if(n1 == 0 && n2 == 0) - { - return result; - } - - const thrust::detail::uint16_t work_per_thread = 15; - const thrust::detail::uint16_t threads_per_block = 128; - const thrust::detail::uint16_t work_per_block = threads_per_block * work_per_thread; - - // -1 because balanced_path adds a single element to the end of a "starred" partition, increasing its size by one - const thrust::detail::uint16_t maximum_partition_size = work_per_block - 1; - const difference num_partitions = thrust::detail::util::divide_ri(n1 + n2, maximum_partition_size); - - // find input partition offsets - // +1 to handle the end of the input elegantly - thrust::detail::temporary_array, DerivedPolicy> input_partition_offsets(0, exec, num_partitions + 1); - d::find_partition_offsets(exec, input_partition_offsets.size(), maximum_partition_size, first1, last1, first2, last2, input_partition_offsets.begin(), comp); - - const difference num_blocks = thrust::min(device_properties().maxGridSize[0], num_partitions); - - // find output partition offsets - // +1 to store the total size of the total - thrust::detail::temporary_array output_partition_offsets(0, exec, num_partitions + 1); - launch_closure(d::make_count_set_operation_closure(input_partition_offsets.begin(), num_partitions, first1, first2, output_partition_offsets.begin(), comp, set_op), - num_blocks, - threads_per_block); - - // turn the output partition counts into offsets to output partitions - thrust::exclusive_scan(exec, output_partition_offsets.begin(), output_partition_offsets.end(), output_partition_offsets.begin()); - - // run the set op kernel - launch_closure(d::make_set_operation_closure(input_partition_offsets.begin(), num_partitions, first1, first2, output_partition_offsets.begin(), result, comp, set_op), - num_blocks, - threads_per_block); - - return result + output_partition_offsets[num_partitions]; -} - - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h b/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h deleted file mode 100644 index 23f32545af..0000000000 --- a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file stable_merge_sort_dev.h - * \brief Defines the interface for a stable merge implementation on CUDA - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template -void stable_merge_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - -template -void stable_merge_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_begin, - RandomAccessIterator1 keys_end, - RandomAccessIterator2 values_begin, - StrictWeakOrdering comp); - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl deleted file mode 100644 index 0c69803294..0000000000 --- a/compat/thrust/system/cuda/detail/detail/stable_merge_sort.inl +++ /dev/null @@ -1,1103 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file stable_merge_sort.inl - * \brief Inline file for stable_merge_sort.h. - * \note This algorithm is based on the one described - * in "Designing Efficient Sorting Algorithms for - * Manycore GPUs", by Satish, Harris, and Garland. - */ - -#include - -#include -#include - -#include - -#include // for log2 -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ -namespace stable_merge_sort_detail -{ - - -template - struct is_block_size_valid -{ - // assume sm_10 limits - static const unsigned int max_num_smem_bytes = 16384; - - // CUDA steals 256 for itself for kernel parms - static const unsigned int num_reserved_smem_bytes = 256; - - // the number of bytes available to our kernels - static const unsigned int num_available_smem_bytes = max_num_smem_bytes - num_reserved_smem_bytes; - - // merge_small_tiles_by_key_closure is the hungriest kernel - // the block_size it uses is 2x the size of all the other kernels - // this merge_small_tiles_by_key_closure's smem requirements: - // 2 * block_size_x2 * sizeof(Key) - // + 2 * block_size_x2 * sizeof(Key) - // + 2 * block_size_x2 * sizeof(Value) - // ================================ - // 4 * (block_size) * (2 * sizeof(Key) + sizeof(Value)) - static const unsigned int num_needed_smem_bytes = 4 * (1 << log_block_size) * (2 * sizeof(Key) + sizeof(Value)); - - static const bool value = num_needed_smem_bytes <= num_available_smem_bytes; -}; - - - -// choose a (log) block_size to use for our kernels -template - struct select_log_block_size - : thrust::detail::eval_if< - is_block_size_valid::value, - thrust::detail::integral_constant, - select_log_block_size - >::type -{}; - - -// don't recurse lower than block_size < 128 -template - struct select_log_block_size<6, Key, Value> -{ - // no block size exists which can satisfy the storage demands -}; - - -template - struct block_size -{ - // prefer block_size == 512, go lower if we need to - static const unsigned int value = 1 << select_log_block_size<8, Key, Value>::value; -}; - - -template -inline unsigned int max_grid_size(Size block_size) -{ - const device_properties_t& properties = device_properties(); - - const unsigned int max_threads = properties.maxThreadsPerMultiProcessor * properties.multiProcessorCount; - const unsigned int max_blocks = properties.maxGridSize[0]; - - return std::min(max_blocks, 3 * max_threads / block_size); -} // end max_grid_size() - - -// Base case for the merge algorithm: merges data where tile_size <= block_size. -// Works by loading two or more tiles into shared memory and doing a binary search. -template -struct merge_small_tiles_by_key_closure -{ - typedef Context context_type; - - RandomAccessIterator1 keys_first; - RandomAccessIterator2 values_first; - const unsigned int n; - const unsigned int log_tile_size; - RandomAccessIterator3 keys_result; - RandomAccessIterator4 values_result; - StrictWeakOrdering comp; - context_type context; - - // these members are derivable from block_size, n, and log_tile_size - unsigned int index_of_last_block; - unsigned int index_of_last_tile_in_last_block; - unsigned int size_of_last_tile; - - merge_small_tiles_by_key_closure - (RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - const unsigned int n, - const unsigned int log_tile_size, - RandomAccessIterator3 keys_result, - RandomAccessIterator4 values_result, - StrictWeakOrdering comp, - Context context = Context()) - : keys_first(keys_first), values_first(values_first), - n(n), - log_tile_size(log_tile_size), - keys_result(keys_result), values_result(values_result), - comp(comp), - context(context) - { - // compute the number of tiles, including a possible partial tile - unsigned int tile_size = 1 << log_tile_size; - unsigned int num_tiles = thrust::detail::util::divide_ri(n, tile_size); - unsigned int partial_tile_size = n % tile_size; - - // compute the number of logical thread blocks, including a possible partial block - unsigned int tiles_per_block = block_size / tile_size; - unsigned int num_blocks = thrust::detail::util::divide_ri(num_tiles, tiles_per_block); - unsigned int partial_block_size = num_tiles % tiles_per_block; - - // compute the number of tiles in the last block, which might be of partial size - unsigned int number_of_tiles_in_last_block = partial_block_size ? partial_block_size : tiles_per_block; - - size_of_last_tile = partial_tile_size ? partial_tile_size : tile_size; - index_of_last_tile_in_last_block = number_of_tiles_in_last_block - 1; - index_of_last_block = num_blocks - 1; - } - - unsigned int grid_size() const - { - const unsigned int max_num_blocks = max_grid_size(block_size); - const unsigned int num_logical_blocks = index_of_last_block + 1; - return thrust::min(num_logical_blocks, max_num_blocks); - } - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename iterator_value::type KeyType; - typedef typename iterator_value::type ValueType; - - // load (2*block_size) elements into shared memory. These (2*block_size) elements belong to (2*block_size)/tile_size different tiles. - __shared__ uninitialized_array key; - __shared__ uninitialized_array outkey; - __shared__ uninitialized_array outvalue; - - const unsigned int grid_size = context.grid_dimension() * context.block_dimension(); - - unsigned int block_idx = context.block_index(); - - // the global index of this task - unsigned int i = context.thread_index() + context.block_index() * context.block_dimension(); - - // advance iterators - keys_first += i; - values_first += i; - keys_result += i; - values_result += i; - - for(; - block_idx <= index_of_last_block; - block_idx += context.grid_dimension(), i += grid_size, keys_first += grid_size, values_first += grid_size, keys_result += grid_size, values_result += grid_size) - { - KeyType my_key; - - // copy over inputs to shared memory - if(i < n) - { - key[context.thread_index()] = my_key = *keys_first; - } // end if - - // the tile to which the element belongs - unsigned int tile_index = context.thread_index()>>log_tile_size; - - // figure out the index and size of the other tile - unsigned int other_tile_index = tile_index^1; - unsigned int other_tile_size = (1< - class static_strided_integer_range -{ - // XXX cudafe doesn't like this private for some reason - //private: - public: - typedef typename thrust::counting_iterator counting_iterator; - - struct stride_functor - : public thrust::unary_function - { - inline __host__ __device__ - unsigned int operator()(unsigned int i) const - { - return stride * i; - } - }; - - public: - typedef typename thrust::transform_iterator iterator; - - static_strided_integer_range(unsigned int num_strides) - : m_begin(iterator(counting_iterator(0), stride_functor())), - m_end(iterator(counting_iterator(num_strides), stride_functor())) - {} - - iterator begin() const - { - return m_begin; - } - - iterator end() const - { - return m_end; - } - - private: - iterator m_begin, m_end; -}; - - -///////////////////// Find the rank of each extracted element in both arrays //////////////////////////////////////// -///////////////////// This breaks up the array into independent segments to merge //////////////////////////////////////// -// Inputs: d_splitters, d_splittes_pos: the merged array of splitters with corresponding positions. -// d_srcData: input data, datasize: number of entries in d_srcData -// N_SPLITTERS the number of splitters, log_blocksize: log of the size of each block of sorted data -// log_num_merged_splitters_per_tile = log of the number of merged splitters. ( = log_blocksize - 7). -// Output: d_rank1, d_rank2: ranks of each splitter in d_splitters in the block to which it belongs -// (say i) and its corresponding block (block i+1). -template -struct rank_splitters_closure -{ - typedef Context context_type; - - static const unsigned int log_block_size = thrust::detail::mpl::math::log2::value; - - RandomAccessIterator1 splitters_first; - RandomAccessIterator2 splitters_pos_first; - RandomAccessIterator3 keys_first; - RandomAccessIterator4 ranks_result1; - RandomAccessIterator4 ranks_result2; - unsigned int num_splitters; - unsigned int num_keys; - unsigned int log_tile_size; - thrust::detail::device_function< - StrictWeakOrdering, - bool - > comp; - context_type context; - - // this member is derivable from those received in the constructor - unsigned int log_num_merged_splitters_per_tile; - - rank_splitters_closure(RandomAccessIterator1 splitters_first, - RandomAccessIterator2 splitters_pos_first, - RandomAccessIterator3 keys_first, - unsigned int num_splitters, - unsigned int num_keys, - unsigned int log_tile_size, - RandomAccessIterator4 ranks_result1, - RandomAccessIterator4 ranks_result2, - StrictWeakOrdering comp, - context_type context = context_type()) - : splitters_first(splitters_first), splitters_pos_first(splitters_pos_first), - keys_first(keys_first), - ranks_result1(ranks_result1), ranks_result2(ranks_result2), - num_splitters(num_splitters), num_keys(num_keys), - log_tile_size(log_tile_size), - comp(comp), context(context) - { - // the number of splitters in each tile before merging - const unsigned int log_num_splitters_per_tile = log_tile_size - log_block_size; - - // the number of splitters in each merged tile - log_num_merged_splitters_per_tile = log_num_splitters_per_tile + 1; - } - - inline unsigned int grid_size() const - { - unsigned int num_blocks = num_splitters / block_size; - if(num_splitters % block_size) ++num_blocks; - - // compute the maximum number of block_size we can launch on this arch - const unsigned int max_num_blocks = max_grid_size(block_size); - - return min(num_blocks, max_num_blocks); - } - - /*! this member function returns the index of the (odd,even) block pair - * that the splitter of interest belongs to - * \param splitter_idx The index of the splitter in the splitters list - * \return The global index of the (odd,even) block pair - */ - __device__ __thrust_forceinline__ - unsigned int block_pair_idx(unsigned int splitter_idx) const - { - return splitter_idx >> log_num_merged_splitters_per_tile; - } - - /*! This member function returns the end of the search range in the other tile in - * which the splitter of interest needs to be ranked. - * \param splitter_idx The index of the splitter in the splitters array - * \param splitter_global_idx The index of the splitter in the global array of elements - * \param tile_idx The index of the tile to which the splitter belongs. - * \return The half-open interval in the other tile in which the splitter needs to be ranked. - * [first_index_to_search, size_of_interval) - */ - __device__ __thrust_forceinline__ - thrust::pair search_interval(unsigned int splitter_idx, unsigned int splitter_global_idx, unsigned int tile_idx) const - { - // We want to compute the ranks of the splitter in d_srcData1 and d_srcData2 - // for instance, if the splitter belongs to d_srcData1, then - // (1) the rank in d_srcData1 is simply given by its splitter_global_idx - // (2) to find the rank in d_srcData2, we first find the block in d_srcData2 where inp appears. - // We do this by noting that we have already merged/sorted splitters, and thus the rank - // of inp in the elements of d_srcData2 that are present in splitters is given by - // position of inp in d_splitters - rank of inp in elements of d_srcData1 in splitters - // = i - splitter_global_idx - // This also gives us the block of d_srcData2 that the splitter belongs in, since we have one - // element in splitters per block of d_srcData2. - - // We now perform a binary search over this block of d_srcData2 to find the rank of inp in d_srcData2. - // start and end are the start and end indices of this block in d_srcData2, forming the bounds of the binary search. - // Note that this binary search is in global memory with uncoalesced loads. However, we only find the ranks - // of a small set of elements, one per splitter: thus it is not the performance bottleneck. - - // the local index of the splitter within the (odd, even) block pair. - const unsigned int splitter_block_pair_idx = splitter_idx - (block_pair_idx(splitter_idx)<> log_block_size; - - // find the end of the search range in the other tile - unsigned int end = (( splitter_block_pair_idx - block_tile_idx) << log_block_size); - - // begin by assuming the search range is the size of a full block - unsigned int other_block_size = block_size; - - // the index of the other tile can be found with - const unsigned int other_tile_idx = tile_idx ^ 1; - - // the size of the other tile can be less than tile_size if the it is the last tile. - unsigned int other_tile_size = min(1 << log_tile_size, num_keys - (other_tile_idx< other_tile_size) - { - // the other block has partial size - end = other_tile_size; - other_block_size = num_keys % block_size; - } - else if(end == 0) - { - // when the search range is empty - // the other_block_size is 0 - other_block_size = 0; - } - - // the search range begins other_block_size elements before the end - unsigned int start = end - other_block_size; - - return thrust::make_pair(start,other_block_size); - } - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename iterator_value::type KeyType; - typedef typename iterator_value::type IndexType; - - const unsigned int grid_size = context.grid_dimension() * context.block_dimension(); - - unsigned int splitter_idx = context.thread_index() + context.block_index() * context.block_dimension(); - - // advance iterators - splitters_first += splitter_idx; - splitters_pos_first += splitter_idx; - ranks_result1 += splitter_idx; - ranks_result2 += splitter_idx; - - for(; - splitter_idx < num_splitters; - splitter_idx += grid_size, splitters_first += grid_size, splitters_pos_first += grid_size, ranks_result1 += grid_size, ranks_result2 += grid_size) - { - // the index of the splitter within the global array of elements - IndexType splitter_global_idx = *splitters_pos_first; - - // the tile to which the splitter belongs. - unsigned int tile_idx = (splitter_global_idx >> log_tile_size); - - // the index of the "other" tile which which tile_idx must be merged. - unsigned int other_tile_idx = tile_idx^1; - - // compute the interval in the other tile to search - unsigned int start, n; - thrust::tie(start,n) = search_interval(splitter_idx, splitter_global_idx, tile_idx); - - // point to the beginning of the other tile - RandomAccessIterator3 other_tile_begin = keys_first + (other_tile_idx< - void rank_splitters(RandomAccessIterator1 splitters_first, - RandomAccessIterator1 splitters_last, - RandomAccessIterator2 splitter_positions_first, - RandomAccessIterator3 keys_first, - RandomAccessIterator3 keys_last, - size_t log_tile_size, - RandomAccessIterator4 ranks_result1, - RandomAccessIterator4 ranks_result2, - StrictWeakOrdering comp) -{ - typedef rank_splitters_closure< - block_size, - RandomAccessIterator1, - RandomAccessIterator2, - RandomAccessIterator3, - RandomAccessIterator4, - StrictWeakOrdering, - detail::statically_blocked_thread_array - > Closure; - - Closure closure(splitters_first, - splitter_positions_first, - keys_first, - splitters_last - splitters_first, - keys_last - keys_first, - log_tile_size, - ranks_result1, - ranks_result2, - comp); - - detail::launch_closure(closure, closure.grid_size(), block_size); -} - - -template -__device__ - void copy_n(Context context, - RandomAccessIterator1 first1, - RandomAccessIterator2 first2, - Size n, - RandomAccessIterator3 result1, - RandomAccessIterator4 result2) -{ - for(Size i = context.thread_index(); - i < n; - i += context.block_dimension()) - { - result1[i] = first1[i]; - result2[i] = first2[i]; - } -} - - -///////////////////// MERGE TWO INDEPENDENT SEGMENTS USING BINARY SEARCH IN SHARED MEMORY //////////////////////////////////////// -// NOTE: This is the most compute-intensive part of the algorithm. -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -// Thread block i merges entries between rank[i] and rank[i+1]. These can be independently -// merged and concatenated, as noted above. -// Each thread in the thread block i does a binary search of one element between rank[i] -> rank[i+1] in the -// other array. - -// Inputs: srcdatakey, value: inputs -// log_blocksize, log_num_merged_splitters_per_tile: as in previous functions -// Outputs: resultdatakey, resultdatavalue: output merged arrays are written here. -template -struct merge_subtiles_by_key_closure -{ - typedef Context context_type; - static const unsigned int log_block_size = thrust::detail::mpl::math::log2::value; - - RandomAccessIterator1 keys_first; - RandomAccessIterator2 values_first; - unsigned int n; - RandomAccessIterator3 ranks_first1; - RandomAccessIterator4 ranks_first2; - const unsigned int tile_size; - const unsigned int num_splitters; - RandomAccessIterator5 keys_result; - RandomAccessIterator6 values_result; - StrictWeakOrdering comp; - Context context; - - // this member is derivable from the constructor parameters - unsigned int log_num_merged_splitters_per_tile; - - merge_subtiles_by_key_closure - (RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - unsigned int n, - RandomAccessIterator3 ranks_first1, - RandomAccessIterator4 ranks_first2, - const unsigned int log_tile_size, - const unsigned int num_splitters, - RandomAccessIterator5 keys_result, - RandomAccessIterator6 values_result, - StrictWeakOrdering comp, - Context context = Context()) - : keys_first(keys_first), values_first(values_first), n(n), - ranks_first1(ranks_first1), ranks_first2(ranks_first2), - tile_size(1 << log_tile_size), - num_splitters(num_splitters), - keys_result(keys_result), values_result(values_result), - comp(comp), context(context) - { - const unsigned int log_num_splitters_per_tile = log_tile_size - log_block_size; - log_num_merged_splitters_per_tile = log_num_splitters_per_tile + 1; - } - - unsigned int grid_size() const - { - const unsigned int max_num_blocks = max_grid_size(block_size); - return thrust::min(num_splitters, max_num_blocks); - } - - __device__ __thrust_forceinline__ - unsigned int even_offset(unsigned int oddeven_blockid) const - { - return oddeven_blockid << (log_num_merged_splitters_per_tile + log_block_size); - } - - __device__ __thrust_forceinline__ - void get_partition(unsigned int partition_idx, unsigned int oddeven_blockid, - unsigned int &rank1, unsigned int &size1, - unsigned int &rank2, unsigned int &size2) const - { - // XXX this logic would be much improved if we were guaranteed that there was - // an element at ranks_first[1] - // XXX we could eliminate the need for local_blockIdx, log_num_merged_splitters_per_block, tile_size, and n - - // the index of the merged splitter within the splitters for the odd-even block pair. - unsigned int local_blockIdx = partition_idx - (oddeven_blockid< n) - { - size2 = n - tile_size - even_offset(oddeven_blockid); - } // end if - - // measure each array relative to its beginning - size1 -= rank1; - size2 -= rank2; - } - - template - __device__ __thrust_forceinline__ - void do_it(KeyType *s_keys, ValueType *s_values) - { - // advance iterators - unsigned int i = context.block_index(); - ranks_first1 += i; - ranks_first2 += i; - - // Thread Block i merges the sub-block associated with splitter i: rank[i] -> rank[i+1] in a particular odd-even block pair. - for(; - i < num_splitters; - i += context.grid_dimension(), ranks_first1 += context.grid_dimension(), ranks_first2 += context.grid_dimension()) - { - // the (odd, even) block pair that the splitter belongs to. - unsigned int oddeven_blockid = i >> log_num_merged_splitters_per_tile; - - // start1 & start2 store rank[i] and rank[i+1] indices in arrays 1 and 2. - // size1 & size2 store the number of of elements between rank[i] & rank[i+1] in arrays 1 & 2. - unsigned int rank1, rank2, size1, size2; - get_partition(i, oddeven_blockid, rank1, size1, rank2, size2); - - // find where the odd,even arrays begin - RandomAccessIterator1 even_keys_first = keys_first + even_offset(oddeven_blockid); - RandomAccessIterator1 odd_keys_first = even_keys_first + tile_size; - - RandomAccessIterator2 even_values_first = values_first + even_offset(oddeven_blockid); - RandomAccessIterator2 odd_values_first = even_values_first + tile_size; - - // load tiles into smem - copy_n(context, even_keys_first + rank1, even_values_first + rank1, size1, s_keys, s_values); - copy_n(context, odd_keys_first + rank2, odd_values_first + rank2, size2, s_keys + size1, s_values + size1); - - context.barrier(); - - // merge the arrays in-place - block::inplace_merge_by_key_n(context, s_keys, s_values, size1, size2, comp); - - context.barrier(); - - // write tiles to gmem - unsigned int dst_offset = even_offset(oddeven_blockid) + rank1 + rank2; - copy_n(context, s_keys, s_values, size1 + size2, keys_result + dst_offset, values_result + dst_offset); - - context.barrier(); - } // end for i - } - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename iterator_value::type KeyType; - typedef typename iterator_value::type ValueType; - - __shared__ uninitialized_array s_keys; - __shared__ uninitialized_array s_values; - - do_it(s_keys.data(), s_values.data()); - } -}; // merge_subtiles_by_key_closure - -// merge_subtiles_by_key() merges each sub-tile independently. As explained in rank_splitters(), -// the sub-tiles are defined by the ranks of the splitter elements d_rank1 and d_rank2 in the odd and even tiles resp. -// It can be easily shown that each sub-tile cannot contain more than block_size elements of either the odd or even tile. - -// the function calls merge_subblocks_binarysearch_kernel() for the remaining N_splitterS sub-tiles -// We use 1 thread block per splitter: For instance, thread block 0 will merge rank1[0] -> rank1[1] of array i with -// rank2[0] -> rank2[1] of array i^1, with i being the thread block to which the splitter belongs. - -// We implement each sub-tile merge using a binary search. We compute the rank of each element belonging to a sub-tile -// of an odd numbered tile in the corresponding sub-tile of its even numbered pair. It then adds this rank to -// the index of the element in its own sub-tile to find the output index of the element in the merged sub-tile. - -template - void merge_subtiles_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - RandomAccessIterator3 splitters_pos_first, - RandomAccessIterator3 splitters_pos_last, - RandomAccessIterator4 ranks_first1, - RandomAccessIterator5 ranks_first2, - RandomAccessIterator6 keys_result, - RandomAccessIterator7 values_result, - unsigned int log_tile_size, - StrictWeakOrdering comp) -{ - typedef typename iterator_value::type KeyType; - typedef typename iterator_value::type ValueType; - - const unsigned int block_size = stable_merge_sort_detail::block_size::value; - - typedef merge_subtiles_by_key_closure< - block_size, - RandomAccessIterator1, - RandomAccessIterator2, - RandomAccessIterator4, - RandomAccessIterator5, - RandomAccessIterator6, - RandomAccessIterator7, - StrictWeakOrdering, - detail::statically_blocked_thread_array - > Closure; - - Closure closure(keys_first, - values_first, - keys_last - keys_first, - ranks_first1, - ranks_first2, - log_tile_size, - splitters_pos_last - splitters_pos_first, - keys_result, - values_result, - comp); - - detail::launch_closure(closure, closure.grid_size(), block_size); -} - - -template - void merge_small_tiles_by_key(execution_policy &, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - size_t log_tile_size, - RandomAccessIterator3 keys_result, - RandomAccessIterator4 values_result, - StrictWeakOrdering comp) -{ - typedef merge_small_tiles_by_key_closure< - block_size, - RandomAccessIterator1, - RandomAccessIterator2, - RandomAccessIterator3, - RandomAccessIterator4, - StrictWeakOrdering, - detail::statically_blocked_thread_array - > Closure; - - Closure closure(keys_first, values_first, keys_last - keys_first, log_tile_size, keys_result, values_result, comp); - - detail::launch_closure(closure, closure.grid_size(), block_size); -} // end merge_small_tiles_by_key() - - -template - void merge_tiles_by_key_recursive(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - RandomAccessIterator3 keys_result, - RandomAccessIterator4 values_result, - size_t log_tile_size, - StrictWeakOrdering comp) -{ - typedef typename iterator_value::type KeyType; - typedef typename iterator_value::type ValueType; - - const size_t tile_size = 1<::value; - - // Case (a): tile_size <= block_size - if(tile_size <= block_size) - { - return merge_small_tiles_by_key<2*block_size>(exec, keys_first, keys_last, values_first, log_tile_size, keys_result, values_result, comp); - } // end if - - // Case (b) tile_size >= block_size - - // step 1 of the recursive case: gather one splitter per block_size entries in each odd-even tile pair. - thrust::detail::temporary_array splitters(exec, thrust::detail::util::divide_ri(keys_last - keys_first, block_size)); - static_strided_integer_range splitters_pos(splitters.size()); - thrust::gather(exec, splitters_pos.begin(), splitters_pos.end(), keys_first, splitters.begin()); - - // step 2 of the recursive case: merge the splitters & their positions - thrust::detail::temporary_array merged_splitters(exec, splitters.size()); - thrust::detail::temporary_array merged_splitters_pos(exec, splitters.size()); - - const unsigned int log_block_size = thrust::detail::mpl::math::log2::value; - size_t log_num_splitters_per_tile = log_tile_size - log_block_size; - merge_tiles_by_key_recursive(exec, - splitters.begin(), - splitters.end(), - splitters_pos.begin(), - merged_splitters.begin(), - merged_splitters_pos.begin(), - log_num_splitters_per_tile, - comp); - - // step 3 of the recursive case: find the ranks of each splitter in the respective two tiles. - // reuse the merged_splitters_pos storage - thrust::detail::temporary_array &rank1 = merged_splitters_pos; - thrust::detail::temporary_array rank2(exec, rank1.size()); - - rank_splitters(merged_splitters.begin(), - merged_splitters.end(), - merged_splitters_pos.begin(), - keys_first, - keys_last, - log_tile_size, - rank1.begin(), - rank2.begin(), - comp); - - // step 4 of the recursive case: merge each sub-tile independently in parallel. - merge_subtiles_by_key(keys_first, - keys_last, - values_first, - merged_splitters_pos.begin(), - merged_splitters_pos.end(), - rank1.begin(), - rank2.begin(), - keys_result, - values_result, - log_tile_size, - comp); -} - - -template - void merge_tiles_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - size_t n, - RandomAccessIterator3 keys_result, - RandomAccessIterator4 values_result, - unsigned int log_tile_size, - StrictWeakOrdering comp) -{ - const unsigned int tile_size = 1 << log_tile_size; - const size_t num_tiles = thrust::detail::util::divide_ri(n, tile_size); - - // if there is an odd number of tiles, we should exclude the last one - // without a twin in merge_recursive - const size_t last_tile_offset = (num_tiles%2)?((num_tiles-1)*tile_size):n; - - merge_tiles_by_key_recursive(exec, - keys_first, - keys_first + last_tile_offset, - values_first, - keys_result, - values_result, - log_tile_size, - comp); - - // copy the last tile without a twin, should it exist - if(last_tile_offset < n) - { - thrust::copy(exec, keys_first + last_tile_offset, keys_first + n, keys_result + last_tile_offset); - thrust::copy(exec, values_first + last_tile_offset, values_first + n, values_result + last_tile_offset); - } // end if -} // end merge_tiles_by_key() - - -} // end stable_merge_sort_detail - - - -template -void stable_merge_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - // XXX it's potentially unsafe to pass the same array for keys & values - thrust::system::cuda::detail::detail::stable_merge_sort_by_key(exec, first, last, first, comp); -} - - -template - void stable_merge_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_traits::value_type KeyType; - typedef typename thrust::iterator_traits::value_type ValueType; - - // compute the block_size based on the types we're sorting - const unsigned int block_size = stable_merge_sort_detail::block_size::value; - - // XXX WAR unused variable warning issued by nvcc - (void) block_size; - - // first, sort each tile of block_size elements - stable_sort_by_count(exec, keys_first, keys_last, values_first, comp); - - // merge tiles if there is more than one - const size_t n = keys_last - keys_first; - if(n > block_size) - { - // allocate scratch space - using namespace thrust::detail; - using namespace stable_merge_sort_detail; - temporary_array temp_keys(exec, n); - temporary_array temp_values(exec, n); - - // use a caching allocator for the calls to merge_tiles_by_key - // XXX unfortunately g++-4.2 can't deal with this special execution policy -#if defined(THRUST_GCC_VERSION) && THRUST_GCC_VERSION < 40300 - execution_policy &merge_exec = exec; -#else - cached_temporary_allocator merge_exec(exec); -#endif - - // The log(n) iterations start here. Each call to 'merge' merges an odd-even pair of tiles - unsigned int log_tile_size = thrust::detail::mpl::math::log2::value; - bool ping = true; - for(; (1u << log_tile_size) < n; ++log_tile_size, ping = !ping) - { - // we ping-pong back and forth - if(ping) - { - merge_tiles_by_key(merge_exec, keys_first, values_first, n, temp_keys.begin(), temp_values.begin(), log_tile_size, comp); - } // end if - else - { - merge_tiles_by_key(merge_exec, temp_keys.begin(), temp_values.begin(), n, keys_first, values_first, log_tile_size, comp); - } // end else - } // end for - - // this is to make sure that our data is finally in the data and keys arrays - // and not in the temporary arrays - if(!ping) - { - thrust::copy(exec, temp_keys.begin(), temp_keys.end(), keys_first); - thrust::copy(exec, temp_values.begin(), temp_values.end(), values_first); - } // end if - } // end if -} // end stable_merge_sort_by_key() - - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - diff --git a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h b/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h deleted file mode 100644 index 8449a17b0d..0000000000 --- a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template -void stable_primitive_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last); - -template -void stable_primitive_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl deleted file mode 100644 index d6f4c775b6..0000000000 --- a/compat/thrust/system/cuda/detail/detail/stable_primitive_sort.inl +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ -namespace stable_primitive_sort_detail -{ - - -template - struct enable_if_bool_sort - : thrust::detail::enable_if< - thrust::detail::is_same< - bool, - typename thrust::iterator_value::type - >::value - > -{}; - - -template - struct disable_if_bool_sort - : thrust::detail::disable_if< - thrust::detail::is_same< - bool, - typename thrust::iterator_value::type - >::value - > -{}; - - -template - typename enable_if_bool_sort::type - stable_primitive_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last) -{ - // use stable_partition if we're sorting bool - // stable_partition puts true values first, so we need to logical_not - thrust::stable_partition(exec, first, last, thrust::logical_not()); -} - - -template - typename disable_if_bool_sort::type - stable_primitive_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last) -{ - // call stable_radix_sort - thrust::system::cuda::detail::detail::stable_radix_sort(exec,first,last); -} - - -struct logical_not_first -{ - template - __host__ __device__ - bool operator()(Tuple t) - { - return !thrust::get<0>(t); - } -}; - - -template - typename enable_if_bool_sort::type - stable_primitive_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - // use stable_partition if we're sorting bool - // stable_partition puts true values first, so we need to logical_not - thrust::stable_partition(exec, - thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)), - thrust::make_zip_iterator(thrust::make_tuple(keys_last, values_first)), - logical_not_first()); -} - - -template - typename disable_if_bool_sort::type - stable_primitive_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - // call stable_radix_sort_by_key - thrust::system::cuda::detail::detail::stable_radix_sort_by_key(exec, keys_first, keys_last, values_first); -} - - - -} - -template -void stable_primitive_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last) -{ - thrust::system::cuda::detail::detail::stable_primitive_sort_detail::stable_primitive_sort(exec,first,last); -} - -template -void stable_primitive_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - thrust::system::cuda::detail::detail::stable_primitive_sort_detail::stable_primitive_sort_by_key(exec, keys_first, keys_last, values_first); -} - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h b/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h deleted file mode 100644 index 7a8b9964c0..0000000000 --- a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file stable_radix_sort_dev.h - * \brief Defines the interface for a stable radix sort implementation on CUDA - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template -void stable_radix_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last); - -template -void stable_radix_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl b/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl deleted file mode 100644 index 9ea197702c..0000000000 --- a/compat/thrust/system/cuda/detail/detail/stable_radix_sort.inl +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -// do not attempt to compile this file with any other compiler -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - -#include -#include -#include -#include - -#include -#include -#include -#include - - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - - -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - -template -void stable_radix_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last) -{ - typedef typename thrust::iterator_value::type K; - - unsigned int num_elements = last - first; - - // ensure data is properly aligned - if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first), 2*sizeof(K))) - { - thrust::detail::temporary_array aligned_keys(exec, first, last); - stable_radix_sort(exec, aligned_keys.begin(), aligned_keys.end()); - thrust::copy(exec, aligned_keys.begin(), aligned_keys.end(), first); - return; - } - - thrust::system::cuda::detail::detail::b40c_thrust::RadixSortingEnactor sorter(num_elements); - thrust::system::cuda::detail::detail::b40c_thrust::RadixSortStorage storage; - - // allocate temporary buffers - thrust::detail::temporary_array temp_keys(exec, num_elements); - thrust::detail::temporary_array temp_spine(exec, sorter.SpineElements()); - thrust::detail::temporary_array temp_from_alt(exec, 2); - - // define storage - storage.d_keys = thrust::raw_pointer_cast(&*first); - storage.d_alt_keys = thrust::raw_pointer_cast(&temp_keys[0]); - storage.d_spine = thrust::raw_pointer_cast(&temp_spine[0]); - storage.d_from_alt_storage = thrust::raw_pointer_cast(&temp_from_alt[0]); - - // perform the sort - sorter.EnactSort(storage); - - // radix sort sometimes leaves results in the alternate buffers - if (storage.using_alternate_storage) - { - thrust::copy(exec, temp_keys.begin(), temp_keys.end(), first); - } -} - -/////////////////////// -// Key-Value Sorting // -/////////////////////// - -// sort values directly -template -void stable_radix_sort_by_key(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - thrust::detail::true_type) -{ - typedef typename thrust::iterator_value::type K; - typedef typename thrust::iterator_value::type V; - - unsigned int num_elements = last1 - first1; - - // ensure data is properly aligned - if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first1), 2*sizeof(K))) - { - thrust::detail::temporary_array aligned_keys(exec, first1, last1); - stable_radix_sort_by_key(exec, aligned_keys.begin(), aligned_keys.end(), first2); - thrust::copy(exec, aligned_keys.begin(), aligned_keys.end(), first1); - return; - } - if (!thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first2), 2*sizeof(V))) - { - thrust::detail::temporary_array aligned_values(exec, first2, first2 + num_elements); - stable_radix_sort_by_key(exec, first1, last1, aligned_values.begin()); - thrust::copy(exec, aligned_values.begin(), aligned_values.end(), first2); - return; - } - - thrust::system::cuda::detail::detail::b40c_thrust::RadixSortingEnactor sorter(num_elements); - thrust::system::cuda::detail::detail::b40c_thrust::RadixSortStorage storage; - - // allocate temporary buffers - thrust::detail::temporary_array temp_keys(exec, num_elements); - thrust::detail::temporary_array temp_values(exec, num_elements); - thrust::detail::temporary_array temp_spine(exec, sorter.SpineElements()); - thrust::detail::temporary_array temp_from_alt(exec, 2); - - // define storage - storage.d_keys = thrust::raw_pointer_cast(&*first1); - storage.d_values = thrust::raw_pointer_cast(&*first2); - storage.d_alt_keys = thrust::raw_pointer_cast(&temp_keys[0]); - storage.d_alt_values = thrust::raw_pointer_cast(&temp_values[0]); - storage.d_spine = thrust::raw_pointer_cast(&temp_spine[0]); - storage.d_from_alt_storage = thrust::raw_pointer_cast(&temp_from_alt[0]); - - // perform the sort - sorter.EnactSort(storage); - - // radix sort sometimes leaves results in the alternate buffers - if (storage.using_alternate_storage) - { - thrust::copy(exec, temp_keys.begin(), temp_keys.end(), first1); - thrust::copy(exec, temp_values.begin(), temp_values.end(), first2); - } -} - - -// sort values indirectly -template -void stable_radix_sort_by_key(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - thrust::detail::false_type) -{ - typedef typename thrust::iterator_value::type V; - - unsigned int num_elements = last1 - first1; - - // sort with integer values and then permute the real values accordingly - thrust::detail::temporary_array permutation(exec, num_elements); - thrust::sequence(exec, permutation.begin(), permutation.end()); - - stable_radix_sort_by_key(exec, first1, last1, permutation.begin()); - - // copy values into temp vector and then permute - thrust::detail::temporary_array temp_values(exec, first2, first2 + num_elements); - - // permute values - thrust::gather(exec, - permutation.begin(), permutation.end(), - temp_values.begin(), - first2); -} - - -template -void stable_radix_sort_by_key(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2) -{ - typedef typename thrust::iterator_value::type V; - - // decide how to handle values - static const bool sort_values_directly = thrust::detail::is_trivial_iterator::value && - thrust::detail::is_arithmetic::value && - sizeof(V) <= 8; // TODO profile this - - // XXX WAR unused variable warning - (void) sort_values_directly; - - stable_radix_sort_by_key(exec, first1, last1, first2, - thrust::detail::integral_constant()); -} - -} // end namespace detail -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - - -#endif // THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - diff --git a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h b/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h deleted file mode 100644 index b563654895..0000000000 --- a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - - -template -void stable_sort_by_count(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - Compare comp); - - -} // end detail -} // end detail -} // end cuda -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl b/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl deleted file mode 100644 index 5efb36b9b0..0000000000 --- a/compat/thrust/system/cuda/detail/detail/stable_sort_by_count.inl +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include -#include -#include -#include -#include -#include -#include - - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ -namespace stable_sort_by_count_detail -{ - - -template -struct stable_sort_by_count_closure -{ - typedef Context context_type; - - RandomAccessIterator1 keys_first; - RandomAccessIterator2 values_first; - StrictWeakOrdering comp; // XXX this should probably be thrust::detail::device_function - const unsigned int n; - context_type context; - - stable_sort_by_count_closure(RandomAccessIterator1 keys_first, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp, - const unsigned int n, - context_type context = context_type()) - : keys_first(keys_first), - values_first(values_first), - comp(comp), - n(n), - context(context) - {} - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename iterator_value::type KeyType; - typedef typename iterator_value::type ValueType; - - __shared__ uninitialized_array s_keys; - __shared__ uninitialized_array s_data; - - const unsigned int grid_size = context.grid_dimension() * context.block_dimension(); - - // block_offset records the global index of this block's 0th thread - unsigned int block_offset = context.block_index() * block_size; - unsigned int i = context.thread_index() + block_offset; - - // advance iterators - keys_first += i; - values_first += i; - - for(; - block_offset < n; - block_offset += grid_size, i += grid_size, keys_first += grid_size, values_first += grid_size) - { - context.barrier(); - // copy input to shared - if(i < n) - { - s_keys[context.thread_index()] = *keys_first; - s_data[context.thread_index()] = *values_first; - } // end if - context.barrier(); - - // this block could be partially full - unsigned int length = block_size; - if(block_offset + block_size > n) - { - length = n - block_offset; - } // end if - - // run merge_sort over the block - block::merging_sort(context, s_keys.begin(), s_data.begin(), length, comp); - - // write result - if(i < n) - { - *keys_first = s_keys[context.thread_index()]; - *values_first = s_data[context.thread_index()]; - } // end if - } // end for i - } - - - static size_t max_grid_size() - { - const device_properties_t& properties = device_properties(); - - const unsigned int max_threads = properties.maxThreadsPerMultiProcessor * properties.multiProcessorCount; - const unsigned int max_blocks = properties.maxGridSize[0]; - - return thrust::min(max_blocks, 3 * max_threads / block_size); - } // end max_grid_size() - - - size_t grid_size() const - { - // compute the maximum number of blocks we can launch on this arch - const unsigned int max_num_blocks = max_grid_size(); - - // first, sort within each block - size_t num_blocks = n / block_size; - if(n % block_size) ++num_blocks; - - return thrust::min(num_blocks, max_num_blocks); - } // end grid_size() -}; // stable_sort_by_count_closure - - -} // end stable_sort_by_count_detail - - -template -void stable_sort_by_count(execution_policy &, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - Compare comp) -{ - typedef stable_sort_by_count_detail::stable_sort_by_count_closure< - count, - RandomAccessIterator1, - RandomAccessIterator2, - Compare, - detail::statically_blocked_thread_array - > Closure; - - Closure closure(keys_first, values_first, comp, keys_last - keys_first); - - // do an odd-even sort per block of data - detail::launch_closure(closure, closure.grid_size(), count); -} // end stable_sort_by_count() - - -} // end detail -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/detail/uninitialized.h b/compat/thrust/system/cuda/detail/detail/uninitialized.h deleted file mode 100644 index a3e3dd2e76..0000000000 --- a/compat/thrust/system/cuda/detail/detail/uninitialized.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - - -template - class uninitialized -{ - private: - typename aligned_storage< - sizeof(T), - alignment_of::value - >::type storage; - - __device__ __thrust_forceinline__ const T* ptr() const - { - return reinterpret_cast(storage.data); - } - - __device__ __thrust_forceinline__ T* ptr() - { - return reinterpret_cast(storage.data); - } - - public: - // copy assignment - __device__ __thrust_forceinline__ uninitialized &operator=(const T &other) - { - T& self = *this; - self = other; - return *this; - } - - __device__ __thrust_forceinline__ T& get() - { - return *ptr(); - } - - __device__ __thrust_forceinline__ const T& get() const - { - return *ptr(); - } - - __device__ __thrust_forceinline__ operator T& () - { - return get(); - } - - __device__ __thrust_forceinline__ operator const T&() const - { - return get(); - } - - __thrust_forceinline__ __device__ void construct() - { - ::new(ptr()) T(); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg &a) - { - ::new(ptr()) T(a); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2) - { - ::new(ptr()) T(a1,a2); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3) - { - ::new(ptr()) T(a1,a2,a3); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4) - { - ::new(ptr()) T(a1,a2,a3,a4); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5) - { - ::new(ptr()) T(a1,a2,a3,a4,a5); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6) - { - ::new(ptr()) T(a1,a2,a3,a4,a5,a6); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7) - { - ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8) - { - ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8, const Arg9 &a9) - { - ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8,a9); - } - - template - __thrust_forceinline__ __device__ void construct(const Arg1 &a1, const Arg2 &a2, const Arg3 &a3, const Arg4 &a4, const Arg5 &a5, const Arg6 &a6, const Arg7 &a7, const Arg8 &a8, const Arg9 &a9, const Arg10 &a10) - { - ::new(ptr()) T(a1,a2,a3,a4,a5,a6,a7,a8,a9,a10); - } - - __thrust_forceinline__ __device__ void destroy() - { - T& self = *this; - self.~T(); - } -}; - - -template - class uninitialized_array -{ - public: - typedef T value_type; - typedef T& reference; - typedef const T& const_reference; - typedef T* pointer; - typedef const T* const_pointer; - typedef pointer iterator; - typedef const_pointer const_iterator; - typedef std::size_t size_type; - - __thrust_forceinline__ __device__ iterator begin() - { - return data(); - } - - __thrust_forceinline__ __device__ const_iterator begin() const - { - return data(); - } - - __thrust_forceinline__ __device__ iterator end() - { - return begin() + size(); - } - - __thrust_forceinline__ __device__ const_iterator end() const - { - return begin() + size(); - } - - __thrust_forceinline__ __device__ const_iterator cbegin() const - { - return begin(); - } - - __thrust_forceinline__ __device__ const_iterator cend() const - { - return end(); - } - - __thrust_forceinline__ __device__ size_type size() const - { - return N; - } - - __thrust_forceinline__ __device__ bool empty() const - { - return false; - } - - __thrust_forceinline__ __device__ T* data() - { - return impl.get(); - } - - __thrust_forceinline__ __device__ const T* data() const - { - return impl.get(); - } - - // element access - __thrust_forceinline__ __device__ reference operator[](size_type n) - { - return data()[n]; - } - - __thrust_forceinline__ __device__ const_reference operator[](size_type n) const - { - return data()[n]; - } - - __thrust_forceinline__ __device__ reference front() - { - return *data(); - } - - __thrust_forceinline__ __device__ const_reference front() const - { - return *data(); - } - - __thrust_forceinline__ __device__ reference back() - { - return data()[size() - size_type(1)]; - } - - __thrust_forceinline__ __device__ const_reference back() const - { - return data()[size() - size_type(1)]; - } - - private: - uninitialized impl; -}; - - -} // end detail -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/equal.h b/compat/thrust/system/cuda/detail/equal.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/equal.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/error.inl b/compat/thrust/system/cuda/detail/error.inl deleted file mode 100644 index 41b928fa32..0000000000 --- a/compat/thrust/system/cuda/detail/error.inl +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ - -namespace system -{ - - -error_code make_error_code(cuda::errc::errc_t e) -{ - return error_code(static_cast(e), cuda_category()); -} // end make_error_code() - - -error_condition make_error_condition(cuda::errc::errc_t e) -{ - return error_condition(static_cast(e), cuda_category()); -} // end make_error_condition() - - -namespace cuda -{ - -namespace detail -{ - - -class cuda_error_category - : public error_category -{ - public: - inline cuda_error_category(void) {} - - inline virtual const char *name(void) const - { - return "cuda"; - } - - inline virtual std::string message(int ev) const - { - static const std::string unknown_err("Unknown error"); - const char *c_str = ::cudaGetErrorString(static_cast(ev)); - return c_str ? std::string(c_str) : unknown_err; - } - - inline virtual error_condition default_error_condition(int ev) const - { - using namespace cuda::errc; - - if(ev < ::cudaErrorApiFailureBase) - { - return make_error_condition(static_cast(ev)); - } - - return system_category().default_error_condition(ev); - } -}; // end cuda_error_category - -} // end detail - -} // end namespace cuda - - -const error_category &cuda_category(void) -{ - static const cuda::detail::cuda_error_category result; - return result; -} - - -} // end namespace system - -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/execution_policy.h b/compat/thrust/system/cuda/detail/execution_policy.h deleted file mode 100644 index 7dae04c1eb..0000000000 --- a/compat/thrust/system/cuda/detail/execution_policy.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -// put the canonical tag in the same ns as the backend's entry points -namespace detail -{ - -// this awkward sequence of definitions arise -// from the desire both for tag to derive -// from execution_policy and for execution_policy -// to convert to tag (when execution_policy is not -// an ancestor of tag) - -// forward declaration of tag -struct tag; - -// forward declaration of execution_policy -template struct execution_policy; - -// specialize execution_policy for tag -template<> - struct execution_policy - : thrust::execution_policy -{}; - -// tag's definition comes before the -// generic definition of execution_policy -struct tag : execution_policy {}; - -// allow conversion to tag when it is not a successor -template - struct execution_policy - : thrust::execution_policy -{ - // allow conversion to tag - inline operator tag () const - { - return tag(); - } -}; - - -template - struct cross_system - : thrust::execution_policy > -{ - inline __host__ __device__ - cross_system(thrust::execution_policy &system1, - thrust::execution_policy &system2) - : system1(system1), system2(system2) - {} - - thrust::execution_policy &system1; - thrust::execution_policy &system2; - - inline __host__ __device__ - cross_system rotate() const - { - return cross_system(system2,system1); - } -}; - - -// overloads of select_system - -// cpp interop -template -inline __host__ __device__ -cross_system select_system(const execution_policy &system1, const thrust::cpp::execution_policy &system2) -{ - thrust::execution_policy &non_const_system1 = const_cast&>(system1); - thrust::cpp::execution_policy &non_const_system2 = const_cast&>(system2); - return cross_system(non_const_system1,non_const_system2); -} - - -template -inline __host__ __device__ -cross_system select_system(const thrust::cpp::execution_policy &system1, execution_policy &system2) -{ - thrust::cpp::execution_policy &non_const_system1 = const_cast&>(system1); - thrust::execution_policy &non_const_system2 = const_cast&>(system2); - return cross_system(non_const_system1,non_const_system2); -} - - -} // end detail - -// alias execution_policy and tag here -using thrust::system::cuda::detail::execution_policy; -using thrust::system::cuda::detail::tag; - -} // end cuda -} // end system - -// alias items at top-level -namespace cuda -{ - -using thrust::system::cuda::execution_policy; -using thrust::system::cuda::tag; - -} // end cuda -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/extern_shared_ptr.h b/compat/thrust/system/cuda/detail/extern_shared_ptr.h deleted file mode 100644 index 5f34cc89ba..0000000000 --- a/compat/thrust/system/cuda/detail/extern_shared_ptr.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - class extern_shared_ptr -{ -// don't attempt to compile with any compiler other than nvcc -// due to use of __shared__ below -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - public: - __device__ - inline operator T * (void) - { - extern __shared__ int4 smem[]; - return reinterpret_cast(smem); - } - - __device__ - inline operator const T * (void) const - { - extern __shared__ int4 smem[]; - return reinterpret_cast(smem); - } -#endif // THRUST_DEVICE_COMPILER_NVCC -}; // end extern_shared_ptr - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/extrema.h b/compat/thrust/system/cuda/detail/extrema.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/extrema.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/fill.h b/compat/thrust/system/cuda/detail/fill.h deleted file mode 100644 index 9c753bb9aa..0000000000 --- a/compat/thrust/system/cuda/detail/fill.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file fill.h - * \brief Device implementation of fill. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - void fill(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &value); - -template - OutputIterator fill_n(execution_policy &exec, - OutputIterator first, - Size n, - const T &value); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/fill.inl b/compat/thrust/system/cuda/detail/fill.inl deleted file mode 100644 index 3c1feb8ac8..0000000000 --- a/compat/thrust/system/cuda/detail/fill.inl +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file fill.inl - * \brief Inline file for fill.h. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace detail -{ - - -template - WidePtr widen_raw_ptr(T *ptr) -{ - typedef thrust::detail::pointer_traits WideTraits; - typedef typename WideTraits::element_type WideT; - - // carefully widen the pointer to avoid warnings about conversions between differently aligned types on ARM - WideT *wide_raw_ptr = static_cast(static_cast(ptr)); - - return WideTraits::pointer_to(*wide_raw_ptr); -} - - -template - Pointer wide_fill_n(execution_policy &exec, - Pointer first, - Size n, - const T &value) -{ - typedef typename thrust::iterator_value::type OutputType; - - size_t ALIGNMENT_BOUNDARY = 128; // begin copying blocks at this byte boundary - - WideType wide_exemplar; - OutputType narrow_exemplars[sizeof(WideType) / sizeof(OutputType)]; - - for (size_t i = 0; i < sizeof(WideType) / sizeof(OutputType); i++) - narrow_exemplars[i] = static_cast(value); - - // cast through char * to avoid type punning warnings - for (size_t i = 0; i < sizeof(WideType); i++) - reinterpret_cast(&wide_exemplar)[i] = reinterpret_cast(narrow_exemplars)[i]; - - OutputType *first_raw = thrust::raw_pointer_cast(first); - OutputType *last_raw = first_raw + n; - - OutputType *block_first_raw = (thrust::min)(first_raw + n, thrust::detail::util::align_up(first_raw, ALIGNMENT_BOUNDARY)); - OutputType *block_last_raw = (thrust::max)(block_first_raw, thrust::detail::util::align_down(last_raw, sizeof(WideType))); - - // rebind Pointer to WideType - typedef typename thrust::detail::rebind_pointer::type WidePtr; - - // point to the widened range - // XXX since we've got an execution policy, we probably don't even need to deal with rebinding pointers - WidePtr block_first_wide = widen_raw_ptr(block_first_raw); - WidePtr block_last_wide = widen_raw_ptr(block_last_raw); - - thrust::generate(exec, first, Pointer(block_first_raw), thrust::detail::fill_functor(value)); - thrust::generate(exec, block_first_wide, block_last_wide, thrust::detail::fill_functor(wide_exemplar)); - thrust::generate(exec, Pointer(block_last_raw), first + n, thrust::detail::fill_functor(value)); - - return first + n; -} - -template - OutputIterator fill_n(execution_policy &exec, - OutputIterator first, - Size n, - const T &value, - thrust::detail::false_type) -{ - thrust::detail::fill_functor func(value); - return thrust::generate_n(exec, first, n, func); -} - -template - OutputIterator fill_n(execution_policy &exec, - OutputIterator first, - Size n, - const T &value, - thrust::detail::true_type) -{ - typedef typename thrust::iterator_traits::value_type OutputType; - - if ( thrust::detail::util::is_aligned(thrust::raw_pointer_cast(&*first)) ) - { - if (compute_capability() < 20) - { - // 32-bit writes are faster on G80 and GT200 - typedef unsigned int WideType; - wide_fill_n(exec, &*first, n, value); - } - else - { - // 64-bit writes are faster on Fermi - typedef unsigned long long WideType; - wide_fill_n(exec, &*first, n, value); - } - - return first + n; - } - else - { - return fill_n(exec, first, n, value, thrust::detail::false_type()); - } -} - -} // end detail - -template - OutputIterator fill_n(execution_policy &exec, - OutputIterator first, - Size n, - const T &value) -{ - typedef typename thrust::iterator_traits::value_type OutputType; - - // we're compiling with nvcc, launch a kernel - const bool use_wide_fill = thrust::detail::is_trivial_iterator::value - && thrust::detail::has_trivial_assign::value - && (sizeof(OutputType) == 1 || sizeof(OutputType) == 2 || sizeof(OutputType) == 4); - - // XXX WAR usused variable warning - (void)use_wide_fill; - - return detail::fill_n(exec, first, n, value, thrust::detail::integral_constant()); -} - -template - void fill(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &value) -{ - thrust::system::cuda::detail::fill_n(exec, first, thrust::distance(first,last), value); -} // end fill() - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/find.h b/compat/thrust/system/cuda/detail/find.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/find.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/for_each.h b/compat/thrust/system/cuda/detail/for_each.h deleted file mode 100644 index 56be13b177..0000000000 --- a/compat/thrust/system/cuda/detail/for_each.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file for_each.h - * \brief Defines the interface for a function that executes a - * function or functional for each value in a given range. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - RandomAccessIterator for_each(execution_policy &s, - RandomAccessIterator first, - RandomAccessIterator last, - UnaryFunction f); - -template - RandomAccessIterator for_each_n(execution_policy &s, - RandomAccessIterator first, - Size n, - UnaryFunction f); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/for_each.inl b/compat/thrust/system/cuda/detail/for_each.inl deleted file mode 100644 index be6e56131f..0000000000 --- a/compat/thrust/system/cuda/detail/for_each.inl +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file for_each.inl - * \brief Inline file for for_each.h. - */ - -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace for_each_n_detail -{ - - -template -struct for_each_n_closure -{ - typedef void result_type; - typedef Context context_type; - - RandomAccessIterator first; - Size n; - thrust::detail::device_function f; - Context context; - - for_each_n_closure(RandomAccessIterator first, - Size n, - UnaryFunction f, - Context context = Context()) - : first(first), n(n), f(f), context(context) - {} - - __device__ __thrust_forceinline__ - result_type operator()(void) - { - const Size grid_size = context.block_dimension() * context.grid_dimension(); - - Size i = context.linear_index(); - - // advance iterator - first += i; - - while(i < n) - { - f(*first); - i += grid_size; - first += grid_size; - } - } -}; // end for_each_n_closure - - -template -thrust::tuple configure_launch(Size n) -{ - // calculate launch configuration - detail::launch_calculator calculator; - - thrust::tuple config = calculator.with_variable_block_size(); - size_t max_blocks = thrust::get<0>(config); - size_t block_size = thrust::get<1>(config); - size_t num_blocks = thrust::min(max_blocks, thrust::detail::util::divide_ri(n, block_size)); - - return thrust::make_tuple(num_blocks, block_size); -} - - -template -bool use_big_closure(Size n, unsigned int little_grid_size) -{ - // use the big closure when n will not fit within an unsigned int - // or if incrementing an unsigned int by little_grid_size would overflow - // the counter - - Size threshold = std::numeric_limits::max(); - - bool result = (sizeof(Size) > sizeof(unsigned int)) && (n > threshold); - - if(!result) - { - // check if we'd overflow the little closure's counter - unsigned int little_n = static_cast(n); - - if((little_n - 1u) + little_grid_size < little_n) - { - result = true; - } - } - - return result; -} - - -} // end for_each_n_detail - - -template -RandomAccessIterator for_each_n(execution_policy &, - RandomAccessIterator first, - Size n, - UnaryFunction f) -{ - // we're attempting to launch a kernel, assert we're compiling with nvcc - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to compile your code using nvcc, rather than g++ or cl.exe X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - - if(n <= 0) return first; // empty range - - // create two candidate closures to implement the for_each - // choose between them based on the whether we can fit n into a smaller integer - // and whether or not we'll overflow the closure's counter - - typedef detail::blocked_thread_array Context; - typedef for_each_n_detail::for_each_n_closure BigClosure; - typedef for_each_n_detail::for_each_n_closure LittleClosure; - - BigClosure big_closure(first, n, f); - LittleClosure little_closure(first, static_cast(n), f); - - thrust::tuple little_config = for_each_n_detail::configure_launch(n); - - unsigned int little_grid_size = thrust::get<0>(little_config) * thrust::get<1>(little_config); - - if(for_each_n_detail::use_big_closure(n, little_grid_size)) - { - // launch the big closure - thrust::tuple big_config = for_each_n_detail::configure_launch(n); - detail::launch_closure(big_closure, thrust::get<0>(big_config), thrust::get<1>(big_config)); - } - else - { - // launch the little closure - detail::launch_closure(little_closure, thrust::get<0>(little_config), thrust::get<1>(little_config)); - } - - return first + n; -} - - -template - InputIterator for_each(execution_policy &exec, - InputIterator first, - InputIterator last, - UnaryFunction f) -{ - return cuda::detail::for_each_n(exec, first, thrust::distance(first,last), f); -} // end for_each() - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/gather.h b/compat/thrust/system/cuda/detail/gather.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/gather.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/generate.h b/compat/thrust/system/cuda/detail/generate.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/generate.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/get_value.h b/compat/thrust/system/cuda/detail/get_value.h deleted file mode 100644 index 273023f612..0000000000 --- a/compat/thrust/system/cuda/detail/get_value.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -namespace -{ - - -template -inline __host__ __device__ - typename thrust::iterator_value::type - get_value_msvc2005_war(execution_policy &exec, Pointer ptr) -{ - typedef typename thrust::iterator_value::type result_type; - - // XXX war nvbugs/881631 - struct war_nvbugs_881631 - { - __host__ inline static result_type host_path(execution_policy &exec, Pointer ptr) - { - // when called from host code, implement with assign_value - // note that this requires a type with default constructor - result_type result; - - thrust::host_system_tag host_tag; - cross_system systems(host_tag, exec); - assign_value(systems, &result, ptr); - - return result; - } - - __device__ inline static result_type device_path(execution_policy &, Pointer ptr) - { - // when called from device code, just do simple deref - return *thrust::raw_pointer_cast(ptr); - } - }; - -#ifndef __CUDA_ARCH__ - return war_nvbugs_881631::host_path(exec, ptr); -#else - return war_nvbugs_881631::device_path(exec, ptr); -#endif // __CUDA_ARCH__ -} // end get_value_msvc2005_war() - - -} // end anon namespace - - -template -inline __host__ __device__ - typename thrust::iterator_value::type - get_value(execution_policy &exec, Pointer ptr) -{ - return get_value_msvc2005_war(exec,ptr); -} // end get_value() - - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h b/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h deleted file mode 100644 index e6c0d2812e..0000000000 --- a/compat/thrust/system/cuda/detail/guarded_cuda_runtime_api.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to check for the existence of macros -// such as __host__ and __device__, which may already be defined by thrust -// and to undefine them before entering cuda_runtime_api.h (which will redefine them) - -// we only try to do this stuff if cuda/include/host_defines.h has been included -#if !defined(__HOST_DEFINES_H__) - -#ifdef __host__ -#undef __host__ -#endif // __host__ - -#ifdef __device__ -#undef __device__ -#endif // __device__ - -#endif // __HOST_DEFINES_H__ - -#include - diff --git a/compat/thrust/system/cuda/detail/inner_product.h b/compat/thrust/system/cuda/detail/inner_product.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/inner_product.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/iter_swap.h b/compat/thrust/system/cuda/detail/iter_swap.h deleted file mode 100644 index 9b2bcf069f..0000000000 --- a/compat/thrust/system/cuda/detail/iter_swap.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -template -inline __host__ __device__ -void iter_swap(tag, Pointer1 a, Pointer2 b) -{ - // XXX war nvbugs/881631 - struct war_nvbugs_881631 - { - __host__ inline static void host_path(Pointer1 a, Pointer2 b) - { - thrust::swap_ranges(a, a + 1, b); - } - - __device__ inline static void device_path(Pointer1 a, Pointer2 b) - { - using thrust::swap; - swap(*thrust::raw_pointer_cast(a), - *thrust::raw_pointer_cast(b)); - } - }; - -#ifndef __CUDA_ARCH__ - return war_nvbugs_881631::host_path(a,b); -#else - return war_nvbugs_881631::device_path(a,b); -#endif // __CUDA_ARCH__ -} // end iter_swap() - - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/logical.h b/compat/thrust/system/cuda/detail/logical.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/logical.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/malloc_and_free.h b/compat/thrust/system/cuda/detail/malloc_and_free.h deleted file mode 100644 index 676dd7cd5a..0000000000 --- a/compat/thrust/system/cuda/detail/malloc_and_free.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -// note that malloc returns a raw pointer to avoid -// depending on the heavyweight thrust/system/cuda/memory.h header -template - void *malloc(execution_policy &, std::size_t n) -{ - void *result = 0; - - cudaError_t error = cudaMalloc(reinterpret_cast(&result), n); - - if(error) - { - throw thrust::system::detail::bad_alloc(thrust::cuda_category().message(error).c_str()); - } // end if - - return result; -} // end malloc() - - -template - void free(execution_policy &, Pointer ptr) -{ - cudaError_t error = cudaFree(thrust::raw_pointer_cast(ptr)); - - if(error) - { - throw thrust::system_error(error, thrust::cuda_category()); - } // end error -} // end free() - - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/memory.inl b/compat/thrust/system/cuda/detail/memory.inl deleted file mode 100644 index 998b54e345..0000000000 --- a/compat/thrust/system/cuda/detail/memory.inl +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace thrust -{ - -// XXX WAR an issue with MSVC 2005 (cl v14.00) incorrectly implementing -// pointer_raw_pointer for pointer by specializing it here -// note that we specialize it here, before the use of raw_pointer_cast -// below, which causes pointer_raw_pointer's instantiation -#if (THRUST_HOST_COMPILER == THRUST_HOST_COMPILER_MSVC) && (_MSC_VER <= 1400) -namespace detail -{ - -template - struct pointer_raw_pointer< thrust::cuda::pointer > -{ - typedef typename thrust::cuda::pointer::raw_pointer type; -}; // end pointer_raw_pointer - -} // end detail -#endif - -namespace system -{ -namespace cuda -{ - - -template - template - reference & - reference - ::operator=(const reference &other) -{ - return super_t::operator=(other); -} // end reference::operator=() - -template - reference & - reference - ::operator=(const value_type &x) -{ - return super_t::operator=(x); -} // end reference::operator=() - -template -__host__ __device__ -void swap(reference a, reference b) -{ - a.swap(b); -} // end swap() - -pointer malloc(std::size_t n) -{ - tag cuda_tag; - return pointer(thrust::system::cuda::detail::malloc(cuda_tag, n)); -} // end malloc() - -template -pointer malloc(std::size_t n) -{ - pointer raw_ptr = thrust::system::cuda::malloc(sizeof(T) * n); - return pointer(reinterpret_cast(raw_ptr.get())); -} // end malloc() - -void free(pointer ptr) -{ - tag cuda_tag; - return thrust::system::cuda::detail::free(cuda_tag, ptr.get()); -} // end free() - -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/merge.h b/compat/thrust/system/cuda/detail/merge.h deleted file mode 100644 index e01b705470..0000000000 --- a/compat/thrust/system/cuda/detail/merge.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - RandomAccessIterator3 merge(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - StrictWeakOrdering comp); - -} // end detail -} // end cuda -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/merge.inl b/compat/thrust/system/cuda/detail/merge.inl deleted file mode 100644 index bf7516fde1..0000000000 --- a/compat/thrust/system/cuda/detail/merge.inl +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace merge_detail -{ - - -template -__device__ __thrust_forceinline__ -thrust::pair - partition_search(RandomAccessIterator1 first1, - RandomAccessIterator2 first2, - Size diag, - Size lower_bound1, - Size upper_bound1, - Size lower_bound2, - Size upper_bound2, - Compare comp) -{ - Size begin = thrust::max(lower_bound1, diag - upper_bound2); - Size end = thrust::min(diag - lower_bound2, upper_bound1); - - while(begin < end) - { - Size mid = (begin + end) / 2; - Size index1 = mid; - Size index2 = diag - mid - 1; - - if(comp(first2[index2], first1[index1])) - { - end = mid; - } - else - { - begin = mid + 1; - } - } - - return thrust::make_pair(begin, diag - begin); -} - - -template -__device__ __thrust_forceinline__ -void merge_n(Context &ctx, - RandomAccessIterator1 first1, - Size n1, - RandomAccessIterator2 first2, - Size n2, - RandomAccessIterator3 result, - Compare comp_, - unsigned int work_per_thread) -{ - const unsigned int block_size = ctx.block_dimension(); - thrust::detail::device_function comp(comp_); - typedef typename thrust::iterator_value::type value_type1; - typedef typename thrust::iterator_value::type value_type2; - - Size result_size = n1 + n2; - - // this is just oversubscription_rate * block_size * work_per_thread - // but it makes no sense to send oversubscription_rate as an extra parameter - Size work_per_block = thrust::detail::util::divide_ri(result_size, ctx.grid_dimension()); - - using thrust::system::cuda::detail::detail::uninitialized; - __shared__ uninitialized > s_block_input_begin; - - Size result_block_offset = ctx.block_index() * work_per_block; - - // find where this block's input begins in both input sequences - if(ctx.thread_index() == 0) - { - s_block_input_begin = (ctx.block_index() == 0) ? - thrust::pair(0,0) : - partition_search(first1, first2, - result_block_offset, - Size(0), n1, - Size(0), n2, - comp); - } - - ctx.barrier(); - - // iterate to consume this block's input - Size work_per_iteration = block_size * work_per_thread; - thrust::pair block_input_end = s_block_input_begin; - block_input_end.first += work_per_iteration; - block_input_end.second += work_per_iteration; - Size result_block_offset_last = result_block_offset + thrust::min(work_per_block, result_size - result_block_offset); - - for(; - result_block_offset < result_block_offset_last; - result_block_offset += work_per_iteration, - block_input_end.first += work_per_iteration, - block_input_end.second += work_per_iteration - ) - { - // find where this thread's input begins in both input sequences for this iteration - thrust::pair thread_input_begin = - partition_search(first1, first2, - Size(result_block_offset + ctx.thread_index() * work_per_thread), - s_block_input_begin.get().first, thrust::min(block_input_end.first , n1), - s_block_input_begin.get().second, thrust::min(block_input_end.second, n2), - comp); - - ctx.barrier(); - - // XXX the performance impact of not keeping x1 & x2 - // in registers is about 10% for int32 - uninitialized x1; - uninitialized x2; - - // XXX this is just a serial merge -- try to simplify or abstract this loop - Size i = result_block_offset + ctx.thread_index() * work_per_thread; - Size last_i = i + thrust::min(work_per_thread, result_size - thread_input_begin.first - thread_input_begin.second); - for(; - i < last_i; - ++i) - { - // optionally load x1 & x2 - bool output_x2 = true; - if(thread_input_begin.second < n2) - { - x2 = first2[thread_input_begin.second]; - } - else - { - output_x2 = false; - } - - if(thread_input_begin.first < n1) - { - x1 = first1[thread_input_begin.first]; - - if(output_x2) - { - output_x2 = comp(x2.get(), x1.get()); - } - } - - result[i] = output_x2 ? x2.get() : x1.get(); - - if(output_x2) - { - ++thread_input_begin.second; - } - else - { - ++thread_input_begin.first; - } - } // end for - - // the block's last thread has conveniently located the - // beginning of the next iteration's input - if(ctx.thread_index() == block_size-1) - { - s_block_input_begin = thread_input_begin; - } - ctx.barrier(); - } // end for -} // end merge_n - - -template - struct merge_n_closure -{ - typedef thrust::system::cuda::detail::detail::blocked_thread_array context_type; - - RandomAccessIterator1 first1; - Size n1; - RandomAccessIterator2 first2; - Size n2; - RandomAccessIterator3 result; - Compare comp; - Size work_per_thread; - - merge_n_closure(RandomAccessIterator1 first1, Size n1, RandomAccessIterator2 first2, Size n2, RandomAccessIterator3 result, Compare comp, Size work_per_thread) - : first1(first1), n1(n1), first2(first2), n2(n2), result(result), comp(comp), work_per_thread(work_per_thread) - {} - - __device__ __forceinline__ - void operator()() - { - context_type ctx; - merge_n(ctx, first1, n1, first2, n2, result, comp, work_per_thread); - } -}; - - -// returns (work_per_thread, threads_per_block, oversubscription_factor) -template - thrust::tuple - tunables(RandomAccessIterator1, RandomAccessIterator1, RandomAccessIterator2, RandomAccessIterator2, RandomAccessIterator3, Compare comp) -{ - // determined by empirical testing on GTX 480 - // ~4500 Mkeys/s on GTX 480 - const unsigned int work_per_thread = 5; - const unsigned int threads_per_block = 128; - const unsigned int oversubscription_factor = 30; - - return thrust::make_tuple(work_per_thread, threads_per_block, oversubscription_factor); -} - - -} // end merge_detail - - -template -RandomAccessIterator3 merge(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp) -{ - typedef typename thrust::iterator_difference::type Size; - Size n1 = last1 - first1; - Size n2 = last2 - first2; - typename thrust::iterator_difference::type n = n1 + n2; - - // empty result - if(n <= 0) return result; - - unsigned int work_per_thread = 0, threads_per_block = 0, oversubscription_factor = 0; - thrust::tie(work_per_thread,threads_per_block,oversubscription_factor) - = merge_detail::tunables(first1, last1, first2, last2, result, comp); - - const unsigned int work_per_block = work_per_thread * threads_per_block; - - const unsigned int num_processors = device_properties().multiProcessorCount; - const unsigned int num_blocks = thrust::min(oversubscription_factor * num_processors, thrust::detail::util::divide_ri(n, work_per_block)); - - typedef merge_detail::merge_n_closure closure_type; - closure_type closure(first1, n1, first2, n2, result, comp, work_per_thread); - - detail::launch_closure(closure, num_blocks, threads_per_block); - - return result + n1 + n2; -} // end merge() - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/mismatch.h b/compat/thrust/system/cuda/detail/mismatch.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/mismatch.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/par.h b/compat/thrust/system/cuda/detail/par.h deleted file mode 100644 index e56128c1d0..0000000000 --- a/compat/thrust/system/cuda/detail/par.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -struct par_t : thrust::system::cuda::detail::execution_policy -{ - par_t() : thrust::system::cuda::detail::execution_policy() {} - - template - thrust::detail::execute_with_allocator - operator()(Allocator &alloc) const - { - return thrust::detail::execute_with_allocator(alloc); - } -}; - - -} // end detail - - -static const detail::par_t par; - - -} // end cuda -} // end system - - -// alias par here -namespace cuda -{ - - -using thrust::system::cuda::par; - - -} // end cuda -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/partition.h b/compat/thrust/system/cuda/detail/partition.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/partition.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/reduce.h b/compat/thrust/system/cuda/detail/reduce.h deleted file mode 100644 index d188f60f25..0000000000 --- a/compat/thrust/system/cuda/detail/reduce.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief Reduce a sequence of elements with a given length. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -template - OutputType reduce(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputType init, - BinaryFunction binary_op); - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/reduce.inl b/compat/thrust/system/cuda/detail/reduce.inl deleted file mode 100644 index 66b4ac72c2..0000000000 --- a/compat/thrust/system/cuda/detail/reduce.inl +++ /dev/null @@ -1,275 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.inl - * \brief Inline file for reduce.h - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -namespace reduce_detail -{ - -/* - * Reduce a vector of n elements using binary_op() - * - * The order of reduction is not defined, so binary_op() should - * be a commutative (and associative) operator such as - * (integer) addition. Since floating point operations - * do not completely satisfy these criteria, the result is - * generally not the same as a consecutive reduction of - * the elements. - * - * Uses the same pattern as reduce6() in the CUDA SDK - * - */ -template -struct unordered_reduce_closure -{ - InputIterator input; - Size n; - T init; - OutputIterator output; - BinaryFunction binary_op; - unsigned int shared_array_size; - - typedef Context context_type; - context_type context; - - unordered_reduce_closure(InputIterator input, Size n, T init, OutputIterator output, BinaryFunction binary_op, unsigned int shared_array_size, Context context = Context()) - : input(input), n(n), init(init), output(output), binary_op(binary_op), shared_array_size(shared_array_size), context(context) {} - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename thrust::iterator_value::type OutputType; - extern_shared_ptr shared_array; - - Size grid_size = context.block_dimension() * context.grid_dimension(); - - Size i = context.linear_index(); - - input += i; - - // compute reduction with all blockDim.x threads - OutputType sum = thrust::raw_reference_cast(*input); - - i += grid_size; - input += grid_size; - - while (i < n) - { - OutputType val = thrust::raw_reference_cast(*input); - - sum = binary_op(sum, val); - - i += grid_size; - input += grid_size; - } - - // write first shared_array_size values into shared memory - if (context.thread_index() < shared_array_size) - shared_array[context.thread_index()] = sum; - - // accumulate remaining values (if any) to shared memory in stages - if (context.block_dimension() > shared_array_size) - { - unsigned int lb = shared_array_size; - unsigned int ub = shared_array_size + lb; - - while (lb < context.block_dimension()) - { - context.barrier(); - - if (lb <= context.thread_index() && context.thread_index() < ub) - { - OutputType tmp = shared_array[context.thread_index() - lb]; - shared_array[context.thread_index() - lb] = binary_op(tmp, sum); - } - - lb += shared_array_size; - ub += shared_array_size; - } - } - - context.barrier(); - - block::reduce_n(context, shared_array, thrust::min(context.block_dimension(), shared_array_size), binary_op); - - if (context.thread_index() == 0) - { - OutputType tmp = shared_array[0]; - - if (context.grid_dimension() == 1) - tmp = binary_op(init, tmp); - - output += context.block_index(); - *output = tmp; - } - } -}; - - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - -template - OutputType reduce(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputType init, - BinaryFunction binary_op) -{ - // we're attempting to launch a kernel, assert we're compiling with nvcc - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to compile your code using nvcc, rather than g++ or cl.exe X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - - typedef typename thrust::iterator_difference::type difference_type; - - difference_type n = thrust::distance(first,last); - - if (n == 0) - return init; - - typedef thrust::detail::temporary_array OutputArray; - typedef typename OutputArray::iterator OutputIterator; - - typedef detail::blocked_thread_array Context; - typedef unordered_reduce_closure Closure; - - function_attributes_t attributes = detail::closure_attributes(); - - // TODO chose this in a more principled manner - size_t threshold = thrust::max(2 * attributes.maxThreadsPerBlock, 1024); - - device_properties_t properties = device_properties(); - - // launch configuration - size_t num_blocks; - size_t block_size; - size_t array_size; - size_t smem_bytes; - - // first level reduction - if (static_cast(n) < threshold) - { - num_blocks = 1; - block_size = thrust::min(static_cast(n), static_cast(attributes.maxThreadsPerBlock)); - array_size = thrust::min(block_size, (properties.sharedMemPerBlock - attributes.sharedSizeBytes) / sizeof(OutputType)); - smem_bytes = sizeof(OutputType) * array_size; - } - else - { - detail::launch_calculator calculator; - - thrust::tuple config = calculator.with_variable_block_size_available_smem(); - - num_blocks = thrust::min(thrust::get<0>(config), static_cast(n) / thrust::get<1>(config)); - block_size = thrust::get<1>(config); - array_size = thrust::min(block_size, thrust::get<2>(config) / sizeof(OutputType)); - smem_bytes = sizeof(OutputType) * array_size; - } - - // TODO assert(n <= num_blocks * block_size); - // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory" - - OutputArray output(exec, num_blocks); - - Closure closure(first, n, init, output.begin(), binary_op, array_size); - - //std::cout << "Launching " << num_blocks << " blocks of kernel with " << block_size << " threads and " << smem_bytes << " shared memory per block " << std::endl; - - detail::launch_closure(closure, num_blocks, block_size, smem_bytes); - - // second level reduction - if (num_blocks > 1) - { - typedef detail::blocked_thread_array Context; - typedef unordered_reduce_closure Closure; - - function_attributes_t attributes = detail::closure_attributes(); - - num_blocks = 1; - block_size = thrust::min(output.size(), static_cast(attributes.maxThreadsPerBlock)); - array_size = thrust::min(block_size, (properties.sharedMemPerBlock - attributes.sharedSizeBytes) / sizeof(OutputType)); - smem_bytes = sizeof(OutputType) * array_size; - - // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory" - - Closure closure(output.begin(), output.size(), init, output.begin(), binary_op, array_size); - - //std::cout << "Launching " << num_blocks << " blocks of kernel with " << block_size << " threads and " << smem_bytes << " shared memory per block " << std::endl; - - detail::launch_closure(closure, num_blocks, block_size, smem_bytes); - } - - return output[0]; -} // end reduce - -} // end reduce_detail - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - -template - OutputType reduce(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputType init, - BinaryFunction binary_op) -{ - return reduce_detail::reduce(exec, first, last, init, binary_op); -} // end reduce() - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/reduce_by_key.h b/compat/thrust/system/cuda/detail/reduce_by_key.h deleted file mode 100644 index 9b8ec10936..0000000000 --- a/compat/thrust/system/cuda/detail/reduce_by_key.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce_by_key.h - * \brief CUDA implementation of reduce_by_key - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - thrust::pair - reduce_by_key(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/reduce_by_key.inl b/compat/thrust/system/cuda/detail/reduce_by_key.inl deleted file mode 100644 index 18dc1e4994..0000000000 --- a/compat/thrust/system/cuda/detail/reduce_by_key.inl +++ /dev/null @@ -1,705 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace reduce_by_key_detail -{ - -template -struct tail_flag_functor -{ - BinaryPredicate binary_pred; // NB: this must be the first member for performance reasons - IndexType n; - - typedef FlagType result_type; - - tail_flag_functor(IndexType n, BinaryPredicate binary_pred) - : n(n), binary_pred(binary_pred) - {} - - // XXX why is this noticably faster? (it may read past the end of input) - //FlagType operator()(const thrust::tuple& t) const - - template - __host__ __device__ __thrust_forceinline__ - FlagType operator()(const Tuple& t) - { - if (thrust::get<0>(t) == (n - 1) || !binary_pred(thrust::get<1>(t), thrust::get<2>(t))) - return 1; - else - return 0; - } -}; - - -template -__device__ __thrust_forceinline__ -FlagType load_flags(Context context, - const unsigned int n, - FlagIterator iflags, - FlagType (&sflag)[CTA_SIZE]) -{ - FlagType flag_bits = 0; - - // load flags in unordered fashion - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = k*CTA_SIZE + context.thread_index(); - - if (FullBlock || offset < n) - { - FlagIterator temp = iflags + offset; - if (*temp) - flag_bits |= FlagType(1) << k; - } - } - - sflag[context.thread_index()] = flag_bits; - - context.barrier(); - - flag_bits = 0; - - // obtain flags for iflags[K * context.thread_index(), K * context.thread_index() + K) - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = K * context.thread_index() + k; - - if (FullBlock || offset < n) - { - flag_bits |= ((sflag[offset % CTA_SIZE] >> (offset / CTA_SIZE)) & FlagType(1)) << k; - } - } - - context.barrier(); - - sflag[context.thread_index()] = flag_bits; - - context.barrier(); - - return flag_bits; -} - -template -__device__ __thrust_forceinline__ -void load_values(Context context, - const unsigned int n, - InputIterator2 ivals, - ValueType (&sdata)[K][CTA_SIZE + 1]) -{ - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = k*CTA_SIZE + context.thread_index(); - - if (FullBlock || offset < n) - { - InputIterator2 temp = ivals + offset; - sdata[offset % K][offset / K] = *temp; - } - } - - context.barrier(); -} - - -template -__device__ __thrust_forceinline__ -void reduce_by_key_body(Context context, - const unsigned int n, - InputIterator1 ikeys, - InputIterator2 ivals, - OutputIterator1 okeys, - OutputIterator2 ovals, - BinaryPredicate binary_pred, - BinaryFunction binary_op, - FlagIterator iflags, - FlagType (&sflag)[CTA_SIZE], - ValueType (&sdata)[K][CTA_SIZE + 1], - bool& carry_in, - IndexType& carry_index, - ValueType& carry_value) -{ - // load flags - const FlagType flag_bits = load_flags(context, n, iflags, sflag); - const FlagType flag_count = __popc(flag_bits); // TODO hide this behind a template - const FlagType left_flag = (context.thread_index() == 0) ? 0 : sflag[context.thread_index() - 1]; - const FlagType head_flag = (context.thread_index() == 0 || flag_bits & ((1 << (K - 1)) - 1) || left_flag & (1 << (K - 1))) ? 1 : 0; - - context.barrier(); - - // scan flag counts - sflag[context.thread_index()] = flag_count; context.barrier(); - - block::inclusive_scan(context, sflag, thrust::plus()); - - const FlagType output_position = (context.thread_index() == 0) ? 0 : sflag[context.thread_index() - 1]; - const FlagType num_outputs = sflag[CTA_SIZE - 1]; - - context.barrier(); - - // shuffle keys and write keys out - if (!thrust::detail::is_discard_iterator::value) - { - // XXX this could be improved - for (unsigned int i = 0; i < num_outputs; i += CTA_SIZE) - { - FlagType position = output_position; - - for(unsigned int k = 0; k < K; k++) - { - if (flag_bits & (FlagType(1) << k)) - { - if (i <= position && position < i + CTA_SIZE) - sflag[position - i] = K * context.thread_index() + k; - position++; - } - } - - context.barrier(); - - if (i + context.thread_index() < num_outputs) - { - InputIterator1 tmp1 = ikeys + sflag[context.thread_index()]; - OutputIterator1 tmp2 = okeys + (i + context.thread_index()); - *tmp2 = *tmp1; - } - - context.barrier(); - } - } - - // load values - load_values (context, n, ivals, sdata); - - ValueType ldata[K]; - for (unsigned int k = 0; k < K; k++) - ldata[k] = sdata[k][context.thread_index()]; - - // carry in (if necessary) - if (context.thread_index() == 0 && carry_in) - { - // XXX WAR sm_10 issue - ValueType tmp1 = carry_value; - ldata[0] = binary_op(tmp1, ldata[0]); - } - - context.barrier(); - - // sum local values - { - for(unsigned int k = 1; k < K; k++) - { - const unsigned int offset = K * context.thread_index() + k; - - if (FullBlock || offset < n) - { - if (!(flag_bits & (FlagType(1) << (k - 1)))) - ldata[k] = binary_op(ldata[k - 1], ldata[k]); - } - } - } - - // second level segmented scan - { - // use head flags for segmented scan - sflag[context.thread_index()] = head_flag; sdata[K - 1][context.thread_index()] = ldata[K - 1]; context.barrier(); - - if (FullBlock) - block::inclusive_scan_by_flag(context, sflag, sdata[K-1], binary_op); - else - block::inclusive_scan_by_flag_n(context, sflag, sdata[K-1], n, binary_op); - } - - // update local values - if (context.thread_index() > 0) - { - unsigned int update_bits = (flag_bits << 1) | (left_flag >> (K - 1)); -// TODO remove guard -#if THRUST_DEVICE_COMPILER == THRUST_DEVICE_COMPILER_NVCC - unsigned int update_count = __ffs(update_bits) - 1u; // NB: this might wrap around to UINT_MAX -#else - unsigned int update_count = 0; -#endif // THRUST_DEVICE_COMPILER_NVCC - - if (!FullBlock && (K + 1) * context.thread_index() > n) - update_count = thrust::min(n - K * context.thread_index(), update_count); - - ValueType left = sdata[K - 1][context.thread_index() - 1]; - - for(unsigned int k = 0; k < K; k++) - { - if (k < update_count) - ldata[k] = binary_op(left, ldata[k]); - } - } - - context.barrier(); - - // store carry out - if (FullBlock) - { - if (context.thread_index() == CTA_SIZE - 1) - { - carry_value = ldata[K - 1]; - carry_in = (flag_bits & (FlagType(1) << (K - 1))) ? false : true; - carry_index = num_outputs; - } - } - else - { - if (context.thread_index() == (n - 1) / K) - { - for (unsigned int k = 0; k < K; k++) - if (k == (n - 1) % K) - carry_value = ldata[k]; - carry_in = (flag_bits & (FlagType(1) << ((n - 1) % K))) ? false : true; - carry_index = num_outputs; - } - } - - // shuffle values - { - FlagType position = output_position; - - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = K * context.thread_index() + k; - - if (FullBlock || offset < n) - { - if (flag_bits & (FlagType(1) << k)) - { - sdata[position / CTA_SIZE][position % CTA_SIZE] = ldata[k]; - position++; - } - } - } - } - - context.barrier(); - - - // write values out - for(unsigned int k = 0; k < K; k++) - { - const unsigned int offset = CTA_SIZE * k + context.thread_index(); - - if (offset < num_outputs) - { - OutputIterator2 tmp = ovals + offset; - *tmp = sdata[k][context.thread_index()]; - } - } - - context.barrier(); -} - -template -struct reduce_by_key_closure -{ - InputIterator1 ikeys; - InputIterator2 ivals; - OutputIterator1 okeys; - OutputIterator2 ovals; - BinaryPredicate binary_pred; - BinaryFunction binary_op; - FlagIterator iflags; - IndexIterator interval_counts; - ValueIterator interval_values; - BoolIterator interval_carry; - Decomposition decomp; - Context context; - - typedef Context context_type; - - reduce_by_key_closure(InputIterator1 ikeys, - InputIterator2 ivals, - OutputIterator1 okeys, - OutputIterator2 ovals, - BinaryPredicate binary_pred, - BinaryFunction binary_op, - FlagIterator iflags, - IndexIterator interval_counts, - ValueIterator interval_values, - BoolIterator interval_carry, - Decomposition decomp, - Context context = Context()) - : ikeys(ikeys), ivals(ivals), okeys(okeys), ovals(ovals), binary_pred(binary_pred), binary_op(binary_op), - iflags(iflags), interval_counts(interval_counts), interval_values(interval_values), interval_carry(interval_carry), - decomp(decomp), context(context) {} - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename thrust::iterator_value::type KeyType; - typedef typename thrust::iterator_value::type ValueType; - typedef typename Decomposition::index_type IndexType; - typedef typename thrust::iterator_value::type FlagType; - - const unsigned int CTA_SIZE = context_type::ThreadsPerBlock::value; - -// TODO centralize this mapping (__CUDA_ARCH__ -> smem bytes) -#if __CUDA_ARCH__ >= 200 - const unsigned int SMEM = (48 * 1024); -#else - const unsigned int SMEM = (16 * 1024) - 256; -#endif - const unsigned int SMEM_FIXED = CTA_SIZE * sizeof(FlagType) + sizeof(ValueType) + sizeof(IndexType) + sizeof(bool); - const unsigned int BOUND_1 = (SMEM - SMEM_FIXED) / ((CTA_SIZE + 1) * sizeof(ValueType)); - const unsigned int BOUND_2 = 8 * sizeof(FlagType); - const unsigned int BOUND_3 = 6; - - // TODO replace this with a static_min::value - const unsigned int K = (BOUND_1 < BOUND_2) ? (BOUND_1 < BOUND_3 ? BOUND_1 : BOUND_3) : (BOUND_2 < BOUND_3 ? BOUND_2 : BOUND_3); - - __shared__ detail::uninitialized sflag; - __shared__ detail::uninitialized sdata; // padded to avoid bank conflicts - - __shared__ detail::uninitialized carry_value; // storage for carry in and carry out - __shared__ detail::uninitialized carry_index; - __shared__ detail::uninitialized carry_in; - - typename Decomposition::range_type interval = decomp[context.block_index()]; - //thrust::system::detail::internal::index_range interval = decomp[context.block_index()]; - - - if (context.thread_index() == 0) - { - carry_in = false; // act as though the previous segment terminated just before us - - if (context.block_index() == 0) - { - carry_index = 0; - } - else - { - interval_counts += (context.block_index() - 1); - carry_index = *interval_counts; - } - } - - context.barrier(); - - IndexType base = interval.begin(); - - // advance input and output iterators - ikeys += base; - ivals += base; - iflags += base; - okeys += carry_index; - ovals += carry_index; - - const unsigned int unit_size = K * CTA_SIZE; - - // process full units - while (base + unit_size <= interval.end()) - { - const unsigned int n = unit_size; - reduce_by_key_body(context, n, ikeys, ivals, okeys, ovals, binary_pred, binary_op, iflags, sflag.get(), sdata.get(), carry_in.get(), carry_index.get(), carry_value.get()); - base += unit_size; - ikeys += unit_size; - ivals += unit_size; - iflags += unit_size; - okeys += carry_index; - ovals += carry_index; - } - - // process partially full unit at end of input (if necessary) - if (base < interval.end()) - { - const unsigned int n = interval.end() - base; - reduce_by_key_body(context, n, ikeys, ivals, okeys, ovals, binary_pred, binary_op, iflags, sflag.get(), sdata.get(), carry_in.get(), carry_index.get(), carry_value.get()); - } - - if (context.thread_index() == 0) - { - interval_values += context.block_index(); - interval_carry += context.block_index(); - *interval_values = carry_value; - *interval_carry = carry_in; - } - } -}; // end reduce_by_key_closure - -template -struct DefaultPolicy -{ - // typedefs - typedef unsigned int FlagType; - typedef typename thrust::iterator_traits::difference_type IndexType; - typedef typename thrust::iterator_traits::value_type KeyType; - typedef thrust::system::detail::internal::uniform_decomposition Decomposition; - - // the pseudocode for deducing the type of the temporary used below: - // - // if BinaryFunction is AdaptableBinaryFunction - // TemporaryType = AdaptableBinaryFunction::result_type - // else if OutputIterator2 is a "pure" output iterator - // TemporaryType = InputIterator2::value_type - // else - // TemporaryType = OutputIterator2::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - typedef typename thrust::detail::eval_if< - thrust::detail::has_result_type::value, - thrust::detail::result_type, - thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - // XXX WAR problem on sm_11 - // TODO tune this - const static unsigned int ThreadsPerBlock = (thrust::detail::is_pod::value) ? 256 : 192; - - DefaultPolicy(InputIterator1 first1, InputIterator1 last1) - : decomp(default_decomposition(last1 - first1)) - {} - - // member variables - Decomposition decomp; -}; - -template - thrust::pair - reduce_by_key(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op, - Policy policy) -{ - typedef typename Policy::FlagType FlagType; - typedef typename Policy::Decomposition Decomposition; - typedef typename Policy::IndexType IndexType; - typedef typename Policy::KeyType KeyType; - typedef typename Policy::ValueType ValueType; - - // temporary arrays - typedef thrust::detail::temporary_array IndexArray; - typedef thrust::detail::temporary_array KeyArray; - typedef thrust::detail::temporary_array ValueArray; - typedef thrust::detail::temporary_array BoolArray; - - Decomposition decomp = policy.decomp; - - // input size - IndexType n = keys_last - keys_first; - - if (n == 0) - return thrust::make_pair(keys_output, values_output); - - IndexArray interval_counts(exec, decomp.size()); - ValueArray interval_values(exec, decomp.size()); - BoolArray interval_carry(exec, decomp.size()); - - // an ode to c++11 auto - typedef thrust::counting_iterator CountingIterator; - typedef thrust::transform_iterator< - tail_flag_functor, - thrust::zip_iterator< - thrust::tuple - > - > FlagIterator; - - FlagIterator iflag= thrust::make_transform_iterator - (thrust::make_zip_iterator(thrust::make_tuple(thrust::counting_iterator(0), keys_first, keys_first + 1)), - tail_flag_functor(n, binary_pred)); - - // count number of tail flags per interval - thrust::system::cuda::detail::reduce_intervals(exec, iflag, interval_counts.begin(), thrust::plus(), decomp); - - thrust::inclusive_scan(exec, - interval_counts.begin(), interval_counts.end(), - interval_counts.begin(), - thrust::plus()); - - // determine output size - const IndexType N = interval_counts[interval_counts.size() - 1]; - - const static unsigned int ThreadsPerBlock = Policy::ThreadsPerBlock; - typedef typename IndexArray::iterator IndexIterator; - typedef typename ValueArray::iterator ValueIterator; - typedef typename BoolArray::iterator BoolIterator; - typedef detail::statically_blocked_thread_array Context; - typedef reduce_by_key_closure Closure; - Closure closure - (keys_first, values_first, - keys_output, values_output, - binary_pred, binary_op, - iflag, - interval_counts.begin(), - interval_values.begin(), - interval_carry.begin(), - decomp); - detail::launch_closure(closure, decomp.size(), ThreadsPerBlock); - - if (decomp.size() > 1) - { - ValueArray interval_values2(exec, decomp.size()); - IndexArray interval_counts2(exec, decomp.size()); - BoolArray interval_carry2(exec, decomp.size()); - - IndexType N2 = - thrust::reduce_by_key - (exec, - thrust::make_zip_iterator(thrust::make_tuple(interval_counts.begin(), interval_carry.begin())), - thrust::make_zip_iterator(thrust::make_tuple(interval_counts.end(), interval_carry.end())), - interval_values.begin(), - thrust::make_zip_iterator(thrust::make_tuple(interval_counts2.begin(), interval_carry2.begin())), - interval_values2.begin(), - thrust::equal_to< thrust::tuple >(), - binary_op).first - - - thrust::make_zip_iterator(thrust::make_tuple(interval_counts2.begin(), interval_carry2.begin())); - - thrust::transform_if - (exec, - interval_values2.begin(), interval_values2.begin() + N2, - thrust::make_permutation_iterator(values_output, interval_counts2.begin()), - interval_carry2.begin(), - thrust::make_permutation_iterator(values_output, interval_counts2.begin()), - binary_op, - thrust::identity()); - } - - return thrust::make_pair(keys_output + N, values_output + N); -} - -} // end namespace reduce_by_key_detail - - -template - thrust::pair - reduce_by_key(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - return reduce_by_key_detail::reduce_by_key - (exec, - keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op, - reduce_by_key_detail::DefaultPolicy(keys_first, keys_last)); -} // end reduce_by_key() - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - diff --git a/compat/thrust/system/cuda/detail/reduce_intervals.h b/compat/thrust/system/cuda/detail/reduce_intervals.h deleted file mode 100644 index 505d13635c..0000000000 --- a/compat/thrust/system/cuda/detail/reduce_intervals.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce_intervals.h - * \brief CUDA implementations of reduce_intervals algorithms. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template -void reduce_intervals(execution_policy &exec, - InputIterator input, - OutputIterator output, - BinaryFunction binary_op, - Decomposition decomp); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/reduce_intervals.inl b/compat/thrust/system/cuda/detail/reduce_intervals.inl deleted file mode 100644 index 2381769223..0000000000 --- a/compat/thrust/system/cuda/detail/reduce_intervals.inl +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template -struct commutative_reduce_intervals_closure -{ - InputIterator input; - OutputIterator output; - BinaryFunction binary_op; - Decomposition decomposition; - unsigned int shared_array_size; - - typedef Context context_type; - context_type context; - - commutative_reduce_intervals_closure(InputIterator input, OutputIterator output, BinaryFunction binary_op, Decomposition decomposition, unsigned int shared_array_size, Context context = Context()) - : input(input), output(output), binary_op(binary_op), decomposition(decomposition), shared_array_size(shared_array_size), context(context) {} - - __device__ __thrust_forceinline__ - void operator()(void) - { - typedef typename thrust::iterator_value::type OutputType; - extern_shared_ptr shared_array; - - typedef typename Decomposition::index_type index_type; - - // this block processes results in [range.begin(), range.end()) - thrust::system::detail::internal::index_range range = decomposition[context.block_index()]; - - index_type i = range.begin() + context.thread_index(); - - input += i; - - if (range.size() < context.block_dimension()) - { - // compute reduction with the first shared_array_size threads - if (context.thread_index() < thrust::min(shared_array_size,range.size())) - { - OutputType sum = *input; - - i += shared_array_size; - input += shared_array_size; - - while (i < range.end()) - { - OutputType val = *input; - - sum = binary_op(sum, val); - - i += shared_array_size; - input += shared_array_size; - } - - shared_array[context.thread_index()] = sum; - } - } - else - { - // compute reduction with all blockDim.x threads - OutputType sum = *input; - - i += context.block_dimension(); - input += context.block_dimension(); - - while (i < range.end()) - { - OutputType val = *input; - - sum = binary_op(sum, val); - - i += context.block_dimension(); - input += context.block_dimension(); - } - - // write first shared_array_size values into shared memory - if (context.thread_index() < shared_array_size) - shared_array[context.thread_index()] = sum; - - // accumulate remaining values (if any) to shared memory in stages - if (context.block_dimension() > shared_array_size) - { - unsigned int lb = shared_array_size; - unsigned int ub = shared_array_size + lb; - - while (lb < context.block_dimension()) - { - context.barrier(); - - if (lb <= context.thread_index() && context.thread_index() < ub) - { - OutputType tmp = shared_array[context.thread_index() - lb]; - shared_array[context.thread_index() - lb] = binary_op(tmp, sum); - } - - lb += shared_array_size; - ub += shared_array_size; - } - } - } - - context.barrier(); - - block::reduce_n(context, shared_array, thrust::min(range.size(), shared_array_size), binary_op); - - if (context.thread_index() == 0) - { - output += context.block_index(); - *output = shared_array[0]; - } - } -}; - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_BEGIN - -template -void reduce_intervals(execution_policy &, - InputIterator input, - OutputIterator output, - BinaryFunction binary_op, - Decomposition decomp) -{ - // we're attempting to launch a kernel, assert we're compiling with nvcc - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to compile your code using nvcc, rather than g++ or cl.exe X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - - if (decomp.size() == 0) - return; - - // TODO if (decomp.size() > deviceProperties.maxGridSize[0]) throw cuda exception (or handle general case) - - typedef detail::blocked_thread_array Context; - typedef commutative_reduce_intervals_closure Closure; - typedef typename thrust::iterator_value::type OutputType; - - detail::launch_calculator calculator; - - thrust::tuple config = calculator.with_variable_block_size_available_smem(); - - //size_t max_blocks = thrust::get<0>(config); - size_t block_size = thrust::get<1>(config); - size_t max_memory = thrust::get<2>(config); - - // determine shared array size - size_t shared_array_size = thrust::min(max_memory / sizeof(OutputType), block_size); - size_t shared_array_bytes = sizeof(OutputType) * shared_array_size; - - // TODO if (shared_array_size < 1) throw cuda exception "insufficient shared memory" - - Closure closure(input, output, binary_op, decomp, shared_array_size); - detail::launch_closure(closure, decomp.size(), block_size, shared_array_bytes); -} - -__THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING_END - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/remove.h b/compat/thrust/system/cuda/detail/remove.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/remove.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/replace.h b/compat/thrust/system/cuda/detail/replace.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/replace.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/reverse.h b/compat/thrust/system/cuda/detail/reverse.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/reverse.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/runtime_introspection.h b/compat/thrust/system/cuda/detail/runtime_introspection.h deleted file mode 100644 index 39f6c9fadc..0000000000 --- a/compat/thrust/system/cuda/detail/runtime_introspection.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file runtime_introspection.h - * \brief Defines the interface to functions - * providing introspection into the architecture - * of CUDA devices. - */ - -#pragma once - -#include - -// #include this for device_properties_t and function_attributes_t -#include - -// #include this for size_t -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -/*! Returns the current device ordinal. - */ -inline int current_device(); - -/*! Returns a copy of the device_properties_t structure - * that is associated with a given device. - */ -inline device_properties_t device_properties(int device_id); - -/*! Returns a copy of the device_properties_t structure - * that is associated with the current device. - */ -inline device_properties_t device_properties(void); - -/*! Returns a copy of the function_attributes_t structure - * that is associated with a given __global__ function - */ -template -inline function_attributes_t function_attributes(KernelFunction kernel); - -/*! Returns the compute capability of a device in integer format. - * For example, returns 10 for sm_10 and 21 for sm_21 - * \return The compute capability as an integer - */ -inline size_t compute_capability(const device_properties_t &properties); -inline size_t compute_capability(void); - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/runtime_introspection.inl b/compat/thrust/system/cuda/detail/runtime_introspection.inl deleted file mode 100644 index a5cc382964..0000000000 --- a/compat/thrust/system/cuda/detail/runtime_introspection.inl +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace runtime_introspection_detail -{ - - -inline void get_device_properties(device_properties_t &p, int device_id) -{ - cudaDeviceProp properties; - - cudaError_t error = cudaGetDeviceProperties(&properties, device_id); - - if(error) - throw thrust::system_error(error, thrust::cuda_category()); - - // be careful about how this is initialized! - device_properties_t temp = { - properties.major, - { - properties.maxGridSize[0], - properties.maxGridSize[1], - properties.maxGridSize[2] - }, - properties.maxThreadsPerBlock, - properties.maxThreadsPerMultiProcessor, - properties.minor, - properties.multiProcessorCount, - properties.regsPerBlock, - properties.sharedMemPerBlock, - properties.warpSize - }; - - p = temp; -} // end get_device_properties() - - -} // end runtime_introspection_detail - - -inline device_properties_t device_properties(int device_id) -{ - // cache the result of get_device_properties, because it is slow - // only cache the first few devices - static const int max_num_devices = 16; - - static bool properties_exist[max_num_devices] = {0}; - static device_properties_t device_properties[max_num_devices] = {}; - - if(device_id >= max_num_devices) - { - device_properties_t result; - runtime_introspection_detail::get_device_properties(result, device_id); - return result; - } - - if(!properties_exist[device_id]) - { - runtime_introspection_detail::get_device_properties(device_properties[device_id], device_id); - - // disallow the compiler to move the write to properties_exist[device_id] - // before the initialization of device_properties[device_id] - __thrust_compiler_fence(); - - properties_exist[device_id] = true; - } - - return device_properties[device_id]; -} - -inline int current_device() -{ - int result = -1; - - cudaError_t error = cudaGetDevice(&result); - - if(error) - throw thrust::system_error(error, thrust::cuda_category()); - - if(result < 0) - throw thrust::system_error(cudaErrorNoDevice, thrust::cuda_category()); - - return result; -} - -inline device_properties_t device_properties(void) -{ - return device_properties(current_device()); -} - -template -inline function_attributes_t function_attributes(KernelFunction kernel) -{ -// cudaFuncGetAttributes(), used below, only exists when __CUDACC__ is defined -#ifdef __CUDACC__ - typedef void (*fun_ptr_type)(); - - fun_ptr_type fun_ptr = reinterpret_cast(kernel); - - cudaFuncAttributes attributes; - - cudaError_t error = cudaFuncGetAttributes(&attributes, fun_ptr); - - if(error) - { - throw thrust::system_error(error, thrust::cuda_category()); - } - - // be careful about how this is initialized! - function_attributes_t result = { - attributes.constSizeBytes, - attributes.localSizeBytes, - attributes.maxThreadsPerBlock, - attributes.numRegs, - attributes.sharedSizeBytes - }; - - return result; -#else - return function_attributes_t(); -#endif // __CUDACC__ -} - -inline size_t compute_capability(const device_properties_t &properties) -{ - return 10 * properties.major + properties.minor; -} - -inline size_t compute_capability(void) -{ - return compute_capability(device_properties()); -} - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/scan.h b/compat/thrust/system/cuda/detail/scan.h deleted file mode 100644 index 036c89a217..0000000000 --- a/compat/thrust/system/cuda/detail/scan.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan.h - * \brief Scan operations (parallel prefix-sum) [cuda] - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - OutputIterator inclusive_scan(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - AssociativeOperator binary_op); - -template - OutputIterator exclusive_scan(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - AssociativeOperator binary_op); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/scan.inl b/compat/thrust/system/cuda/detail/scan.inl deleted file mode 100644 index 9d9c6d20ee..0000000000 --- a/compat/thrust/system/cuda/detail/scan.inl +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan.inl - * \brief Inline file for scan.h. - */ - -#include -#include - -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - OutputIterator inclusive_scan(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - AssociativeOperator binary_op) -{ - // we're attempting to launch a kernel, assert we're compiling with nvcc - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to compile your code using nvcc, rather than g++ or cl.exe X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - - return thrust::system::cuda::detail::detail::fast_scan::inclusive_scan(exec, first, last, result, binary_op); -} - -template - OutputIterator exclusive_scan(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - AssociativeOperator binary_op) -{ - // we're attempting to launch a kernel, assert we're compiling with nvcc - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to compile your code using nvcc, rather than g++ or cl.exe X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - - return thrust::system::cuda::detail::detail::fast_scan::exclusive_scan(exec, first, last, result, init, binary_op); -} - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/scan_by_key.h b/compat/thrust/system/cuda/detail/scan_by_key.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/scan_by_key.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/scatter.h b/compat/thrust/system/cuda/detail/scatter.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/scatter.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/sequence.h b/compat/thrust/system/cuda/detail/sequence.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/sequence.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/set_difference.inl b/compat/thrust/system/cuda/detail/set_difference.inl deleted file mode 100644 index 33d9884730..0000000000 --- a/compat/thrust/system/cuda/detail/set_difference.inl +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace set_difference_detail -{ - - -struct serial_bounded_set_difference -{ - // max_input_size <= 32 - template - inline __device__ - thrust::detail::uint32_t operator()(Size max_input_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - OutputIterator result, - Compare comp) - { - thrust::detail::uint32_t active_mask = 0; - thrust::detail::uint32_t active_bit = 1; - - while(first1 != last1 && first2 != last2) - { - if(comp(*first1,*first2)) - { - *result = *first1; - active_mask |= active_bit; - ++first1; - } // end if - else if(comp(*first2,*first1)) - { - ++first2; - } // end else if - else - { - ++first1; - ++first2; - } // end else - - ++result; - active_bit <<= 1; - } // end while - - while(first1 != last1) - { - *result = *first1; - ++first1; - ++result; - active_mask |= active_bit; - active_bit <<= 1; - } - - return active_mask; - } - - - template - inline __device__ - Size count(Size max_input_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - Compare comp) - { - Size result = 0; - - while(first1 != last1 && first2 != last2) - { - if(comp(*first1,*first2)) - { - ++first1; - ++result; - } // end if - else if(comp(*first2,*first1)) - { - ++first2; - } // end else if - else - { - ++first1; - ++first2; - } // end else - } // end while - - return result + last1 - first1; - } -}; // end serial_bounded_set_difference - - -} // end namespace set_difference_detail - - -template -RandomAccessIterator3 set_difference(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp) -{ - return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_difference_detail::serial_bounded_set_difference()); -} // end set_difference - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/set_intersection.inl b/compat/thrust/system/cuda/detail/set_intersection.inl deleted file mode 100644 index e4810b6d1b..0000000000 --- a/compat/thrust/system/cuda/detail/set_intersection.inl +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace set_intersection_detail -{ - - -struct serial_bounded_set_intersection -{ - // max_input_size <= 32 - template - inline __device__ - thrust::detail::uint32_t operator()(Size max_input_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - OutputIterator result, - Compare comp) - { - thrust::detail::uint32_t active_mask = 0; - thrust::detail::uint32_t active_bit = 1; - - while(first1 != last1 && first2 != last2) - { - if(comp(*first1,*first2)) - { - ++first1; - } // end if - else if(comp(*first2,*first1)) - { - ++first2; - } // end else if - else - { - *result = *first1; - ++first1; - ++first2; - active_mask |= active_bit; - } // end else - - ++result; - active_bit <<= 1; - } // end while - - return active_mask; - } - - - template - inline __device__ - Size count(Size max_input_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - Compare comp) - { - Size result = 0; - - while(first1 != last1 && first2 != last2) - { - if(comp(*first1,*first2)) - { - ++first1; - } // end if - else if(comp(*first2,*first1)) - { - ++first2; - } // end else if - else - { - ++result; - ++first1; - ++first2; - } // end else - } // end while - - return result; - } -}; // end serial_bounded_set_intersection - - -} // end namespace set_intersection_detail - - -template -RandomAccessIterator3 set_intersection(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp) -{ - return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_intersection_detail::serial_bounded_set_intersection()); -} // end set_intersection - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/set_operations.h b/compat/thrust/system/cuda/detail/set_operations.h deleted file mode 100644 index 040e3419ce..0000000000 --- a/compat/thrust/system/cuda/detail/set_operations.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -template -RandomAccessIterator3 set_difference(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp); - - -template -RandomAccessIterator3 set_intersection(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp); - - -template -RandomAccessIterator3 set_symmetric_difference(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp); - - -template -RandomAccessIterator3 set_union(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp); - - -} // end detail -} // end cuda -} // end system -} // end thrust - -#include -#include -#include -#include - diff --git a/compat/thrust/system/cuda/detail/set_symmetric_difference.inl b/compat/thrust/system/cuda/detail/set_symmetric_difference.inl deleted file mode 100644 index 112c955bc0..0000000000 --- a/compat/thrust/system/cuda/detail/set_symmetric_difference.inl +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace set_symmetric_difference_detail -{ - - -struct serial_bounded_set_symmetric_difference -{ - // max_input_size <= 32 - template - inline __device__ - thrust::detail::uint32_t operator()(Size max_input_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - OutputIterator result, - Compare comp) - { - thrust::detail::uint32_t active_mask = 0; - thrust::detail::uint32_t active_bit = 1; - - while(first1 != last1 && first2 != last2) - { - if(comp(*first1,*first2)) - { - *result = *first1; - active_mask |= active_bit; - ++first1; - } // end if - else if(comp(*first2,*first1)) - { - *result = *first2; - active_mask |= active_bit; - ++first2; - } // end else if - else - { - ++first1; - ++first2; - } // end else - - ++result; - active_bit <<= 1; - } // end while - - while(first1 != last1) - { - *result = *first1; - ++first1; - ++result; - active_mask |= active_bit; - active_bit <<= 1; - } - - while(first2 != last2) - { - *result = *first2; - ++first2; - ++result; - active_mask |= active_bit; - active_bit <<= 1; - } - - return active_mask; - } - - - template - inline __device__ - Size count(Size max_input_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - Compare comp) - { - Size result = 0; - - while(first1 != last1 && first2 != last2) - { - if(comp(*first1,*first2)) - { - ++first1; - ++result; - } // end if - else if(comp(*first2,*first1)) - { - ++first2; - ++result; - } // end else if - else - { - ++first1; - ++first2; - } // end else - } // end while - - return result + thrust::max(last1 - first1,last2 - first2); - } -}; // end serial_bounded_set_symmetric_difference - - -} // end namespace set_symmetric_difference_detail - - -template -RandomAccessIterator3 set_symmetric_difference(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp) -{ - return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_symmetric_difference_detail::serial_bounded_set_symmetric_difference()); -} // end set_symmetric_difference - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/set_union.inl b/compat/thrust/system/cuda/detail/set_union.inl deleted file mode 100644 index 66cccab39b..0000000000 --- a/compat/thrust/system/cuda/detail/set_union.inl +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ -namespace set_union_detail -{ - - -struct serial_bounded_set_union -{ - // max_input_size <= 32 - template - inline __device__ - thrust::detail::uint32_t operator()(Size max_input_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - OutputIterator result, - Compare comp) - { - thrust::detail::uint32_t active_mask = 0; - thrust::detail::uint32_t active_bit = 1; - - while(first1 != last1 && first2 != last2) - { - if(comp(*first1,*first2)) - { - *result = *first1; - ++first1; - } // end if - else if(comp(*first2,*first1)) - { - *result = *first2; - ++first2; - } // end else if - else - { - *result = *first1; - ++first1; - ++first2; - } // end else - - ++result; - active_mask |= active_bit; - active_bit <<= 1; - } // end while - - while(first1 != last1) - { - *result = *first1; - ++first1; - ++result; - active_mask |= active_bit; - active_bit <<= 1; - } - - while(first2 != last2) - { - *result = *first2; - ++first2; - ++result; - active_mask |= active_bit; - active_bit <<= 1; - } - - return active_mask; - } - - - template - inline __device__ - Size count(Size max_input_size, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - Compare comp) - { - Size result = 0; - - while(first1 != last1 && first2 != last2) - { - if(comp(*first1,*first2)) - { - ++first1; - } // end if - else if(comp(*first2,*first1)) - { - ++first2; - } // end else if - else - { - ++first1; - ++first2; - } // end else - - ++result; - } // end while - - return result + thrust::max(last1 - first1,last2 - first2); - } -}; // end serial_bounded_set_union - - -} // end namespace set_union_detail - - -template -RandomAccessIterator3 set_union(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - RandomAccessIterator2 last2, - RandomAccessIterator3 result, - Compare comp) -{ - return thrust::system::cuda::detail::detail::set_operation(exec, first1, last1, first2, last2, result, comp, set_union_detail::serial_bounded_set_union()); -} // end set_union - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/sort.h b/compat/thrust/system/cuda/detail/sort.h deleted file mode 100644 index e78d36a76b..0000000000 --- a/compat/thrust/system/cuda/detail/sort.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - void stable_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - -template - void stable_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/sort.inl b/compat/thrust/system/cuda/detail/sort.inl deleted file mode 100644 index d7e0a60c45..0000000000 --- a/compat/thrust/system/cuda/detail/sort.inl +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file sort.inl - * \brief Inline file for sort.h - */ - -#include -#include - -#include -#include -#include -#include -#include -#include - - -/* - * This file implements the following dispatch procedure for cuda::stable_sort() - * and cuda::stable_sort_by_key(). The first level inspects the KeyType - * and StrictWeakOrdering to determine whether a sort assuming primitive-typed - * data may be applied. - * - * If a sort assuming primitive-typed data can be applied (i.e., a radix sort), - * the input ranges are first trivialized (turned into simple contiguous ranges - * if they are not already). To implement descending orderings, an ascending - * sort will be reversed. - * - * If a sort assuming primitive-typed data cannot be applied, a comparison-based - * sort is used. Depending on the size of the key and value types, one level of - * indirection may be applied to their input ranges. This transformation - * may be applied to either range to convert an ill-suited problem (i.e. sorting with - * large keys or large value) into a problem more amenable to the underlying - * merge sort algorithm. - */ - - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -namespace stable_sort_detail -{ - - -template - struct can_use_primitive_sort - : thrust::detail::and_< - thrust::detail::is_arithmetic, - thrust::detail::or_< - thrust::detail::is_same >, - thrust::detail::is_same > - > - > -{}; - - -template - struct enable_if_primitive_sort - : thrust::detail::enable_if< - can_use_primitive_sort< - typename iterator_value::type, - StrictWeakCompare - >::value - > -{}; - - -template - struct enable_if_comparison_sort - : thrust::detail::disable_if< - can_use_primitive_sort< - typename iterator_value::type, - StrictWeakCompare - >::value - > -{}; - - -template - typename enable_if_primitive_sort::type - stable_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - // ensure sequence has trivial iterators - thrust::detail::trivial_sequence keys(exec, first, last); - - // CUDA path for thrust::stable_sort with primitive keys - // (e.g. int, float, short, etc.) and a less or greater comparison - // method is implemented with a primitive sort - thrust::system::cuda::detail::detail::stable_primitive_sort(exec, keys.begin(), keys.end()); - - // copy results back, if necessary - if(!thrust::detail::is_trivial_iterator::value) - { - thrust::copy(exec, keys.begin(), keys.end(), first); - } - - // if comp is greater then reverse the keys - typedef typename thrust::iterator_traits::value_type KeyType; - const static bool reverse = thrust::detail::is_same >::value; - - if(reverse) - { - thrust::reverse(first, last); - } -} - -template - typename enable_if_comparison_sort::type - stable_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - // decide whether to sort keys indirectly - typedef typename thrust::iterator_value::type KeyType; - typedef thrust::detail::integral_constant 8)> use_key_indirection; - - conditional_temporary_indirect_ordering potentially_indirect_keys(derived_cast(exec), first, last, comp); - - thrust::system::cuda::detail::detail::stable_merge_sort(exec, - potentially_indirect_keys.begin(), - potentially_indirect_keys.end(), - potentially_indirect_keys.comp()); -} - -template - typename enable_if_primitive_sort::type - stable_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - // path for thrust::stable_sort_by_key with primitive keys - // (e.g. int, float, short, etc.) and a less or greater comparison - // method is implemented with stable_primitive_sort_by_key - - // if comp is greater then reverse the keys and values - typedef typename thrust::iterator_traits::value_type KeyType; - const static bool reverse = thrust::detail::is_same >::value; - - // note, we also have to reverse the (unordered) input to preserve stability - if (reverse) - { - thrust::reverse(exec, keys_first, keys_last); - thrust::reverse(exec, values_first, values_first + (keys_last - keys_first)); - } - - // ensure sequences have trivial iterators - thrust::detail::trivial_sequence keys(exec, keys_first, keys_last); - thrust::detail::trivial_sequence values(exec, values_first, values_first + (keys_last - keys_first)); - - thrust::system::cuda::detail::detail::stable_primitive_sort_by_key(exec, keys.begin(), keys.end(), values.begin()); - - // copy results back, if necessary - if(!thrust::detail::is_trivial_iterator::value) - thrust::copy(exec, keys.begin(), keys.end(), keys_first); - if(!thrust::detail::is_trivial_iterator::value) - thrust::copy(exec, values.begin(), values.end(), values_first); - - if (reverse) - { - thrust::reverse(exec, keys_first, keys_last); - thrust::reverse(exec, values_first, values_first + (keys_last - keys_first)); - } -} - - -template - typename enable_if_comparison_sort::type - stable_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - // decide whether to apply indirection to either range - typedef typename thrust::iterator_value::type KeyType; - typedef typename thrust::iterator_value::type ValueType; - - typedef thrust::detail::integral_constant 8)> use_key_indirection; - typedef thrust::detail::integral_constant 4)> use_value_indirection; - - conditional_temporary_indirect_ordering< - use_key_indirection, - DerivedPolicy, - RandomAccessIterator1, - StrictWeakOrdering - > potentially_indirect_keys(derived_cast(exec), keys_first, keys_last, comp); - - conditional_temporary_indirect_permutation< - use_value_indirection, - DerivedPolicy, - RandomAccessIterator2 - > potentially_indirect_values(derived_cast(exec), values_first, values_first + (keys_last - keys_first)); - - thrust::system::cuda::detail::detail::stable_merge_sort_by_key(exec, - potentially_indirect_keys.begin(), - potentially_indirect_keys.end(), - potentially_indirect_values.begin(), - potentially_indirect_keys.comp()); -} - - -} // end namespace stable_sort_detail - - -template - void stable_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - // we're attempting to launch a kernel, assert we're compiling with nvcc - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to compile your code using nvcc, rather than g++ or cl.exe X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - - stable_sort_detail::stable_sort(exec, first, last, comp); -} - - -template - void stable_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - // we're attempting to launch a kernel, assert we're compiling with nvcc - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to compile your code using nvcc, rather than g++ or cl.exe X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - - stable_sort_detail::stable_sort_by_key(exec, keys_first, keys_last, values_first, comp); -} - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/swap_ranges.h b/compat/thrust/system/cuda/detail/swap_ranges.h deleted file mode 100644 index 9b1949e60f..0000000000 --- a/compat/thrust/system/cuda/detail/swap_ranges.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// cuda has no special swap_ranges - diff --git a/compat/thrust/system/cuda/detail/synchronize.h b/compat/thrust/system/cuda/detail/synchronize.h deleted file mode 100644 index 762f4a39c4..0000000000 --- a/compat/thrust/system/cuda/detail/synchronize.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -inline void synchronize(const char *message = ""); - -inline void synchronize_if_enabled(const char *message = ""); - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/synchronize.inl b/compat/thrust/system/cuda/detail/synchronize.inl deleted file mode 100644 index 5f70f799c4..0000000000 --- a/compat/thrust/system/cuda/detail/synchronize.inl +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -void synchronize(const char *message) -{ - cudaError_t error = cudaThreadSynchronize(); - if(error) - { - throw thrust::system_error(error, thrust::cuda_category(), std::string("synchronize: ") + message); - } // end if -} // end synchronize() - -void synchronize_if_enabled(const char *message) -{ -// XXX this could potentially be a runtime decision -#if __THRUST_SYNCHRONOUS - synchronize(message); -#else - // WAR "unused parameter" warning - (void) message; -#endif -} // end synchronize_if_enabled() - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/tabulate.h b/compat/thrust/system/cuda/detail/tabulate.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/tabulate.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/temporary_buffer.h b/compat/thrust/system/cuda/detail/temporary_buffer.h deleted file mode 100644 index 628bd75719..0000000000 --- a/compat/thrust/system/cuda/detail/temporary_buffer.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special temporary buffer functions - diff --git a/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h b/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h deleted file mode 100644 index 3d05f44155..0000000000 --- a/compat/thrust/system/cuda/detail/temporary_indirect_permutation.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - - -template - struct temporary_indirect_permutation -{ - private: - typedef unsigned int size_type; - typedef thrust::detail::temporary_array array_type; - - public: - temporary_indirect_permutation(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last) - : m_exec(exec), - m_src_first(first), - m_src_last(last), - m_permutation(0, m_exec, last - first) - { - // generate sorted index sequence - thrust::sequence(exec, m_permutation.begin(), m_permutation.end()); - } - - ~temporary_indirect_permutation() - { - // permute the source array using the indices - typedef typename thrust::iterator_value::type value_type; - thrust::detail::temporary_array temp(m_exec, m_src_first, m_src_last); - thrust::gather(m_exec, m_permutation.begin(), m_permutation.end(), temp.begin(), m_src_first); - } - - typedef typename array_type::iterator iterator; - - iterator begin() - { - return m_permutation.begin(); - } - - iterator end() - { - return m_permutation.end(); - } - - private: - DerivedPolicy &m_exec; - RandomAccessIterator m_src_first, m_src_last; - thrust::detail::temporary_array m_permutation; -}; - - -template - struct iterator_range_with_execution_policy -{ - iterator_range_with_execution_policy(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last) - : m_exec(exec), m_first(first), m_last(last) - {} - - typedef RandomAccessIterator iterator; - - iterator begin() - { - return m_first; - } - - iterator end() - { - return m_last; - } - - DerivedPolicy &exec() - { - return m_exec; - } - - DerivedPolicy &m_exec; - RandomAccessIterator m_first, m_last; -}; - - -template - struct conditional_temporary_indirect_permutation - : thrust::detail::eval_if< - Condition::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - >::type -{ - typedef typename thrust::detail::eval_if< - Condition::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - >::type super_t; - - conditional_temporary_indirect_permutation(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last) - : super_t(exec, first, last) - {} -}; - - -template - struct temporary_indirect_ordering - : temporary_indirect_permutation -{ - private: - typedef temporary_indirect_permutation super_t; - - public: - temporary_indirect_ordering(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) - : super_t(exec, first, last), - m_comp(first, comp) - {} - - struct compare - { - RandomAccessIterator first; - - thrust::detail::host_device_function< - Compare, - bool - > comp; - - compare(RandomAccessIterator first, Compare comp) - : first(first), comp(comp) - {} - - template - __host__ __device__ - bool operator()(Integral a, Integral b) - { - return comp(first[a], first[b]); - } - }; - - compare comp() const - { - return m_comp; - } - - private: - compare m_comp; -}; - - -template - struct iterator_range_with_execution_policy_and_compare - : iterator_range_with_execution_policy -{ - typedef iterator_range_with_execution_policy super_t; - - iterator_range_with_execution_policy_and_compare(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) - : super_t(exec, first, last), m_comp(comp) - {} - - typedef Compare compare; - - compare comp() - { - return m_comp; - } - - Compare m_comp; -}; - - -template - struct conditional_temporary_indirect_ordering - : thrust::detail::eval_if< - Condition::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - >::type -{ - typedef typename thrust::detail::eval_if< - Condition::value, - thrust::detail::identity_ >, - thrust::detail::identity_ > - >::type super_t; - - conditional_temporary_indirect_ordering(DerivedPolicy &exec, RandomAccessIterator first, RandomAccessIterator last, Compare comp) - : super_t(exec, first, last, comp) - {} -}; - - -} // end detail -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/detail/transform.h b/compat/thrust/system/cuda/detail/transform.h deleted file mode 100644 index 0af87056e7..0000000000 --- a/compat/thrust/system/cuda/detail/transform.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// cuda has no special transform - diff --git a/compat/thrust/system/cuda/detail/transform_reduce.h b/compat/thrust/system/cuda/detail/transform_reduce.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/transform_reduce.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/transform_scan.h b/compat/thrust/system/cuda/detail/transform_scan.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/transform_scan.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/trivial_copy.h b/compat/thrust/system/cuda/detail/trivial_copy.h deleted file mode 100644 index e0e898aad4..0000000000 --- a/compat/thrust/system/cuda/detail/trivial_copy.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -template - void trivial_copy_n(execution_policy &exec, - RandomAccessIterator1 first, - Size n, - RandomAccessIterator2 result); - -template - void trivial_copy_n(cross_system &exec, - RandomAccessIterator1 first, - Size n, - RandomAccessIterator2 result); - -} // end detail -} // end cuda -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/cuda/detail/trivial_copy.inl b/compat/thrust/system/cuda/detail/trivial_copy.inl deleted file mode 100644 index d23a4ef8c2..0000000000 --- a/compat/thrust/system/cuda/detail/trivial_copy.inl +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ -namespace detail -{ - -namespace trivial_copy_detail -{ - -inline void checked_cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind) -{ - cudaError_t error = cudaMemcpy(dst,src,count,kind); - if(error) - { - throw thrust::system_error(error, thrust::cuda_category()); - } // end error -} // end checked_cudaMemcpy() - - -template - cudaMemcpyKind cuda_memcpy_kind(const thrust::cuda::execution_policy &, - const thrust::cpp::execution_policy &) -{ - return cudaMemcpyDeviceToHost; -} // end cuda_memcpy_kind() - - -template - cudaMemcpyKind cuda_memcpy_kind(const thrust::cpp::execution_policy &, - const thrust::cuda::execution_policy &) -{ - return cudaMemcpyHostToDevice; -} // end cuda_memcpy_kind() - - -} // end namespace trivial_copy_detail - - -template - void trivial_copy_n(execution_policy &exec, - RandomAccessIterator1 first, - Size n, - RandomAccessIterator2 result) -{ - typedef typename thrust::iterator_value::type T; - - void *dst = thrust::raw_pointer_cast(&*result); - const void *src = thrust::raw_pointer_cast(&*first); - - trivial_copy_detail::checked_cudaMemcpy(dst, src, n * sizeof(T), cudaMemcpyDeviceToDevice); -} - - -template - void trivial_copy_n(cross_system &systems, - RandomAccessIterator1 first, - Size n, - RandomAccessIterator2 result) -{ - typedef typename thrust::iterator_value::type T; - - void *dst = thrust::raw_pointer_cast(&*result); - const void *src = thrust::raw_pointer_cast(&*first); - - cudaMemcpyKind kind = trivial_copy_detail::cuda_memcpy_kind(thrust::detail::derived_cast(systems.system1), thrust::detail::derived_cast(systems.system2)); - - trivial_copy_detail::checked_cudaMemcpy(dst, src, n * sizeof(T), kind); -} - - -} // end namespace detail -} // end namespace cuda -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/cuda/detail/uninitialized_copy.h b/compat/thrust/system/cuda/detail/uninitialized_copy.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/uninitialized_copy.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/uninitialized_fill.h b/compat/thrust/system/cuda/detail/uninitialized_fill.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/uninitialized_fill.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/unique.h b/compat/thrust/system/cuda/detail/unique.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/unique.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/unique_by_key.h b/compat/thrust/system/cuda/detail/unique_by_key.h deleted file mode 100644 index a307fc5fc8..0000000000 --- a/compat/thrust/system/cuda/detail/unique_by_key.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special version of this algorithm - diff --git a/compat/thrust/system/cuda/detail/vector.inl b/compat/thrust/system/cuda/detail/vector.inl deleted file mode 100644 index 36598764b2..0000000000 --- a/compat/thrust/system/cuda/detail/vector.inl +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ccudaliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ - -template - vector - ::vector() - : super_t() -{} - -template - vector - ::vector(size_type n) - : super_t(n) -{} - -template - vector - ::vector(size_type n, const value_type &value) - : super_t(n,value) -{} - -template - vector - ::vector(const vector &x) - : super_t(x) -{} - -template - template - vector - ::vector(const thrust::detail::vector_base &x) - : super_t(x) -{} - -template - template - vector - ::vector(const std::vector &x) - : super_t(x) -{} - -template - template - vector - ::vector(InputIterator first, InputIterator last) - : super_t(first,last) -{} - -template - template - vector & - vector - ::operator=(const std::vector &x) -{ - super_t::operator=(x); - return *this; -} - -template - template - vector & - vector - ::operator=(const thrust::detail::vector_base &x) -{ - super_t::operator=(x); - return *this; -} - -} // end cuda -} // end system -} // end thrust - diff --git a/compat/thrust/system/cuda/error.h b/compat/thrust/system/cuda/error.h deleted file mode 100644 index 8d098538db..0000000000 --- a/compat/thrust/system/cuda/error.h +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file thrust/system/cuda/error.h - * \brief CUDA-specific error reporting - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -namespace system -{ - -namespace cuda -{ - -/*! \addtogroup system - * \{ - */ - -// To construct an error_code after a CUDA Runtime error: -// -// error_code(::cudaGetLastError(), cuda_category()) - -// XXX N3000 prefers enum class errc { ... } -namespace errc -{ - -/*! \p errc_t enumerates the kinds of CUDA Runtime errors. - */ -enum errc_t -{ - // from cuda/include/driver_types.h - // mirror their order - success = cudaSuccess, - missing_configuration = cudaErrorMissingConfiguration, - memory_allocation = cudaErrorMemoryAllocation, - initialization_error = cudaErrorInitializationError, - launch_failure = cudaErrorLaunchFailure, - prior_launch_failure = cudaErrorPriorLaunchFailure, - launch_timeout = cudaErrorLaunchTimeout, - launch_out_of_resources = cudaErrorLaunchOutOfResources, - invalid_device_function = cudaErrorInvalidDeviceFunction, - invalid_configuration = cudaErrorInvalidConfiguration, - invalid_device = cudaErrorInvalidDevice, - invalid_value = cudaErrorInvalidValue, - invalid_pitch_value = cudaErrorInvalidPitchValue, - invalid_symbol = cudaErrorInvalidSymbol, - map_buffer_object_failed = cudaErrorMapBufferObjectFailed, - unmap_buffer_object_failed = cudaErrorUnmapBufferObjectFailed, - invalid_host_pointer = cudaErrorInvalidHostPointer, - invalid_device_pointer = cudaErrorInvalidDevicePointer, - invalid_texture = cudaErrorInvalidTexture, - invalid_texture_binding = cudaErrorInvalidTextureBinding, - invalid_channel_descriptor = cudaErrorInvalidChannelDescriptor, - invalid_memcpy_direction = cudaErrorInvalidMemcpyDirection, - address_of_constant_error = cudaErrorAddressOfConstant, - texture_fetch_failed = cudaErrorTextureFetchFailed, - texture_not_bound = cudaErrorTextureNotBound, - synchronization_error = cudaErrorSynchronizationError, - invalid_filter_setting = cudaErrorInvalidFilterSetting, - invalid_norm_setting = cudaErrorInvalidNormSetting, - mixed_device_execution = cudaErrorMixedDeviceExecution, - cuda_runtime_unloading = cudaErrorCudartUnloading, - unknown = cudaErrorUnknown, - not_yet_implemented = cudaErrorNotYetImplemented, - memory_value_too_large = cudaErrorMemoryValueTooLarge, - invalid_resource_handle = cudaErrorInvalidResourceHandle, - not_ready = cudaErrorNotReady, - insufficient_driver = cudaErrorInsufficientDriver, - set_on_active_process_error = cudaErrorSetOnActiveProcess, - no_device = cudaErrorNoDevice, - ecc_uncorrectable = cudaErrorECCUncorrectable, - -#if CUDART_VERSION >= 4020 - shared_object_symbol_not_found = cudaErrorSharedObjectSymbolNotFound, - shared_object_init_failed = cudaErrorSharedObjectInitFailed, - unsupported_limit = cudaErrorUnsupportedLimit, - duplicate_variable_name = cudaErrorDuplicateVariableName, - duplicate_texture_name = cudaErrorDuplicateTextureName, - duplicate_surface_name = cudaErrorDuplicateSurfaceName, - devices_unavailable = cudaErrorDevicesUnavailable, - invalid_kernel_image = cudaErrorInvalidKernelImage, - no_kernel_image_for_device = cudaErrorNoKernelImageForDevice, - incompatible_driver_context = cudaErrorIncompatibleDriverContext, - peer_access_already_enabled = cudaErrorPeerAccessAlreadyEnabled, - peer_access_not_enabled = cudaErrorPeerAccessNotEnabled, - device_already_in_use = cudaErrorDeviceAlreadyInUse, - profiler_disabled = cudaErrorProfilerDisabled, - assert_triggered = cudaErrorAssert, - too_many_peers = cudaErrorTooManyPeers, - host_memory_already_registered = cudaErrorHostMemoryAlreadyRegistered, - host_memory_not_registered = cudaErrorHostMemoryNotRegistered, - operating_system_error = cudaErrorOperatingSystem, -#endif - -#if CUDART_VERSION >= 5000 - peer_access_unsupported = cudaErrorPeerAccessUnsupported, - launch_max_depth_exceeded = cudaErrorLaunchMaxDepthExceeded, - launch_file_scoped_texture_used = cudaErrorLaunchFileScopedTex, - launch_file_scoped_surface_used = cudaErrorLaunchFileScopedSurf, - sync_depth_exceeded = cudaErrorSyncDepthExceeded, - attempted_operation_not_permitted = cudaErrorNotPermitted, - attempted_operation_not_supported = cudaErrorNotSupported, -#endif - - startup_failure = cudaErrorStartupFailure -}; // end errc_t - - -} // end namespace errc - -} // end namespace cuda - -/*! \return A reference to an object of a type derived from class \p thrust::error_category. - * \note The object's \p equivalent virtual functions shall behave as specified - * for the class \p thrust::error_category. The object's \p name virtual function shall - * return a pointer to the string "cuda". The object's - * \p default_error_condition virtual function shall behave as follows: - * - * If the argument ev corresponds to a CUDA error value, the function - * shall return error_condition(ev,cuda_category()). - * Otherwise, the function shall return system_category.default_error_condition(ev). - */ -inline const error_category &cuda_category(void); - - -// XXX N3000 prefers is_error_code_enum - -/*! Specialization of \p is_error_code_enum for \p cuda::errc::errc_t - */ -template<> struct is_error_code_enum : thrust::detail::true_type {}; - - -// XXX replace cuda::errc::errc_t with cuda::errc upon c++0x -/*! \return error_code(static_cast(e), cuda::error_category()) - */ -inline error_code make_error_code(cuda::errc::errc_t e); - - -// XXX replace cuda::errc::errc_t with cuda::errc upon c++0x -/*! \return error_condition(static_cast(e), cuda::error_category()). - */ -inline error_condition make_error_condition(cuda::errc::errc_t e); - -/*! \} // end system - */ - - -} // end system - -namespace cuda -{ - -// XXX replace with using system::cuda_errc upon c++0x -namespace errc = system::cuda::errc; - -} // end cuda - -using system::cuda_category; - -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/cuda/execution_policy.h b/compat/thrust/system/cuda/execution_policy.h deleted file mode 100644 index bbd33defd4..0000000000 --- a/compat/thrust/system/cuda/execution_policy.h +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -/*! \file thrust/system/cuda/execution_policy.h - * \brief Execution policies for Thrust's CUDA system. - */ - -#include - -// get the execution policies definitions first -#include - -// get the definition of par -#include - -// now get all the algorithm defintitions - -// the order of the following #includes seems to matter, unfortunately - -// primitives come first, in order of increasing sophistication -#include -#include -#include - -#include -#include -#include -#include -#include - -// these are alphabetical -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -// define these entities here for the purpose of Doxygenating them -// they are actually defined elsewhere -#if 0 -namespace thrust -{ -namespace system -{ -namespace cuda -{ - - -/*! \addtogroup execution_policies - * \{ - */ - - -/*! \p thrust::cuda::execution_policy is the base class for all Thrust parallel execution - * policies which are derived from Thrust's CUDA backend system. - */ -template -struct execution_policy : thrust::execution_policy -{}; - - -/*! \p cuda::tag is a type representing Thrust's CUDA backend system in C++'s type system. - * Iterators "tagged" with a type which is convertible to \p cuda::tag assert that they may be - * "dispatched" to algorithm implementations in the \p cuda system. - */ -struct tag : thrust::system::cuda::execution_policy { unspecified }; - - -/*! \p thrust::cuda::par is the parallel execution policy associated with Thrust's CUDA - * backend system. - * - * Instead of relying on implicit algorithm dispatch through iterator system tags, users may - * directly target Thrust's CUDA backend system by providing \p thrust::cuda::par as an algorithm - * parameter. - * - * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such - * as \p thrust::cuda::vector. - * - * The type of \p thrust::cuda::par is implementation-defined. - * - * The following code snippet demonstrates how to use \p thrust::cuda::par to explicitly dispatch an - * invocation of \p thrust::for_each to the CUDA backend system: - * - * \code - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * printf("%d\n"); - * } - * }; - * ... - * int vec[3]; - * vec[0] = 0; vec[1] = 1; vec[2] = 2; - * - * thrust::for_each(thrust::cuda::par, vec.begin(), vec.end(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - */ -static const unspecified par; - - -/*! \} - */ - - -} // end cuda -} // end system -} // end thrust -#endif - - diff --git a/compat/thrust/system/cuda/experimental/pinned_allocator.h b/compat/thrust/system/cuda/experimental/pinned_allocator.h deleted file mode 100644 index 5294659e62..0000000000 --- a/compat/thrust/system/cuda/experimental/pinned_allocator.h +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/cuda/experimental/pinned_allocator.h - * \brief An allocator which creates new elements in "pinned" memory with \p cudaMallocHost - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ - -namespace system -{ - -namespace cuda -{ - -namespace experimental -{ - -/*! \addtogroup memory_management Memory Management - * \addtogroup memory_management_classes - * \ingroup memory_management - * \{ - */ - -/*! \p pinned_allocator is a CUDA-specific host memory allocator - * that employs \c cudaMallocHost for allocation. - * - * \see http://www.sgi.com/tech/stl/Allocators.html - */ -template class pinned_allocator; - -template<> - class pinned_allocator -{ - public: - typedef void value_type; - typedef void * pointer; - typedef const void * const_pointer; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - // convert a pinned_allocator to pinned_allocator - template - struct rebind - { - typedef pinned_allocator other; - }; // end rebind -}; // end pinned_allocator - - -template - class pinned_allocator -{ - public: - typedef T value_type; - typedef T* pointer; - typedef const T* const_pointer; - typedef T& reference; - typedef const T& const_reference; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - // convert a pinned_allocator to pinned_allocator - template - struct rebind - { - typedef pinned_allocator other; - }; // end rebind - - /*! \p pinned_allocator's null constructor does nothing. - */ - __host__ __device__ - inline pinned_allocator() {} - - /*! \p pinned_allocator's null destructor does nothing. - */ - __host__ __device__ - inline ~pinned_allocator() {} - - /*! \p pinned_allocator's copy constructor does nothing. - */ - __host__ __device__ - inline pinned_allocator(pinned_allocator const &) {} - - /*! This version of \p pinned_allocator's copy constructor - * is templated on the \c value_type of the \p pinned_allocator - * to copy from. It is provided merely for convenience; it - * does nothing. - */ - template - __host__ __device__ - inline pinned_allocator(pinned_allocator const &) {} - - /*! This method returns the address of a \c reference of - * interest. - * - * \p r The \c reference of interest. - * \return \c r's address. - */ - __host__ __device__ - inline pointer address(reference r) { return &r; } - - /*! This method returns the address of a \c const_reference - * of interest. - * - * \p r The \c const_reference of interest. - * \return \c r's address. - */ - __host__ __device__ - inline const_pointer address(const_reference r) { return &r; } - - /*! This method allocates storage for objects in pinned host - * memory. - * - * \p cnt The number of objects to allocate. - * \return a \c pointer to the newly allocated objects. - * \note This method does not invoke \p value_type's constructor. - * It is the responsibility of the caller to initialize the - * objects at the returned \c pointer. - */ - __host__ - inline pointer allocate(size_type cnt, - const_pointer = 0) - { - if(cnt > this->max_size()) - { - throw std::bad_alloc(); - } // end if - - pointer result(0); - cudaError_t error = cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type)); - - if(error) - { - throw std::bad_alloc(); - } // end if - - return result; - } // end allocate() - - /*! This method deallocates pinned host memory previously allocated - * with this \c pinned_allocator. - * - * \p p A \c pointer to the previously allocated memory. - * \p cnt The number of objects previously allocated at - * \p p. - * \note This method does not invoke \p value_type's destructor. - * It is the responsibility of the caller to destroy - * the objects stored at \p p. - */ - __host__ - inline void deallocate(pointer p, size_type cnt) - { - cudaError_t error = cudaFreeHost(p); - - if(error) - { - throw thrust::system_error(error, thrust::cuda_category()); - } // end if - } // end deallocate() - - /*! This method returns the maximum size of the \c cnt parameter - * accepted by the \p allocate() method. - * - * \return The maximum number of objects that may be allocated - * by a single call to \p allocate(). - */ - inline size_type max_size() const - { - return (std::numeric_limits::max)() / sizeof(T); - } // end max_size() - - /*! This method tests this \p pinned_allocator for equality to - * another. - * - * \param x The other \p pinned_allocator of interest. - * \return This method always returns \c true. - */ - __host__ __device__ - inline bool operator==(pinned_allocator const& x) { return true; } - - /*! This method tests this \p pinned_allocator for inequality - * to another. - * - * \param x The other \p pinned_allocator of interest. - * \return This method always returns \c false. - */ - __host__ __device__ - inline bool operator!=(pinned_allocator const &x) { return !operator==(x); } -}; // end pinned_allocator - -/*! \} - */ - -} // end experimental - -} // end cuda - -} // end system - -// alias cuda's members at top-level -namespace cuda -{ - -namespace experimental -{ - -using thrust::system::cuda::experimental::pinned_allocator; - -} // end experimental - -} // end cuda - -} // end thrust - diff --git a/compat/thrust/system/cuda/memory.h b/compat/thrust/system/cuda/memory.h deleted file mode 100644 index 368eea265a..0000000000 --- a/compat/thrust/system/cuda/memory.h +++ /dev/null @@ -1,421 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ccudaliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/cuda/memory.h - * \brief Managing memory associated with Thrust's CUDA system. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace cuda -{ - -template class pointer; - -} // end cuda -} // end system -} // end thrust - - -/*! \cond - */ - -// specialize std::iterator_traits to avoid problems with the name of -// pointer's constructor shadowing its nested pointer type -// do this before pointer is defined so the specialization is correctly -// used inside the definition -namespace std -{ - -template - struct iterator_traits > -{ - private: - typedef thrust::system::cuda::pointer ptr; - - public: - typedef typename ptr::iterator_category iterator_category; - typedef typename ptr::value_type value_type; - typedef typename ptr::difference_type difference_type; - typedef ptr pointer; - typedef typename ptr::reference reference; -}; // end iterator_traits - -} // end std - -/*! \endcond - */ - - -namespace thrust -{ -namespace system -{ - -/*! \addtogroup system_backends Systems - * \ingroup system - * \{ - */ - -/*! \namespace thrust::system::cuda - * \brief \p thrust::system::cuda is the namespace containing functionality for allocating, manipulating, - * and deallocating memory available to Thrust's CUDA backend system. - * The identifiers are provided in a separate namespace underneath thrust::system - * for import convenience but are also aliased in the top-level thrust::tbb - * namespace for easy access. - * - */ -namespace cuda -{ - -// forward declaration of reference for pointer -template class reference; - -/*! \cond - */ - -// XXX nvcc + msvc have trouble instantiating reference below -// this is a workaround -namespace detail -{ - -template - struct reference_msvc_workaround -{ - typedef thrust::system::cuda::reference type; -}; // end reference_msvc_workaround - -} // end detail - -/*! \endcond - */ - -#if 0 -/*! \p cuda::tag is type representing Thrust's CUDA backend system in C++'s type system. - * Iterators "tagged" with a type which is convertible to \p cuda::tag assert that they may be - * "dispatched" to algorithm implementations in the \p cuda system. - */ -struct tag { unspecified }; -#endif - -/*! \p pointer stores a pointer to an object allocated in memory available to the cuda system. - * This type provides type safety when dispatching standard algorithms on ranges resident - * in cuda memory. - * - * \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic. - * - * \p pointer can be created with the function \p cuda::malloc, or by explicitly calling its constructor - * with a raw pointer. - * - * The raw pointer encapsulated by a \p pointer may be obtained by eiter its get member function - * or the \p raw_pointer_cast function. - * - * \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory - * pointed to by \p pointer. - * - * \tparam T specifies the type of the pointee. - * - * \see cuda::malloc - * \see cuda::free - * \see raw_pointer_cast - */ -template - class pointer - : public thrust::pointer< - T, - thrust::system::cuda::tag, - thrust::system::cuda::reference, - thrust::system::cuda::pointer - > -{ - /*! \cond - */ - - private: - typedef thrust::pointer< - T, - thrust::system::cuda::tag, - //thrust::system::cuda::reference, - typename detail::reference_msvc_workaround::type, - thrust::system::cuda::pointer - > super_t; - - /*! \endcond - */ - - public: - - /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0. - */ - __host__ __device__ - pointer() : super_t() {} - - /*! This constructor allows construction of a pointer from a T*. - * - * \param ptr A raw pointer to copy from, presumed to point to a location in memory - * accessible by the \p tbb system. - * \tparam OtherT \p OtherT shall be convertible to \p T. - */ - template - __host__ __device__ - explicit pointer(OtherT *ptr) : super_t(ptr) {} - - /*! This constructor allows construction from another pointer-like object with related type. - * - * \param other The \p OtherPointer to copy. - * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible - * to \p thrust::system::cuda::tag and its element type shall be convertible to \p T. - */ - template - __host__ __device__ - pointer(const OtherPointer &other, - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer - >::type * = 0) : super_t(other) {} - - /*! Assignment operator allows assigning from another pointer-like object with related type. - * - * \param other The other pointer-like object to assign from. - * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible - * to \p thrust::system::cuda::tag and its element type shall be convertible to \p T. - */ - template - __host__ __device__ - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer, - pointer & - >::type - operator=(const OtherPointer &other) - { - return super_t::operator=(other); - } -}; // end pointer - - -/*! \p reference is a wrapped reference to an object stored in memory available to the \p cuda system. - * \p reference is the type of the result of dereferencing a \p cuda::pointer. - * - * \tparam T Specifies the type of the referenced object. - */ -template - class reference - : public thrust::reference< - T, - thrust::system::cuda::pointer, - thrust::system::cuda::reference - > -{ - /*! \cond - */ - - private: - typedef thrust::reference< - T, - thrust::system::cuda::pointer, - thrust::system::cuda::reference - > super_t; - - /*! \endcond - */ - - public: - /*! \cond - */ - - typedef typename super_t::value_type value_type; - typedef typename super_t::pointer pointer; - - /*! \endcond - */ - - /*! This constructor initializes this \p reference to refer to an object - * pointed to by the given \p pointer. After this \p reference is constructed, - * it shall refer to the object pointed to by \p ptr. - * - * \param ptr A \p pointer to copy from. - */ - __host__ __device__ - explicit reference(const pointer &ptr) - : super_t(ptr) - {} - - /*! This constructor accepts a const reference to another \p reference of related type. - * After this \p reference is constructed, it shall refer to the same object as \p other. - * - * \param other A \p reference to copy from. - * \tparam OtherT The element type of the other \p reference. - * - * \note This constructor is templated primarily to allow initialization of reference - * from reference. - */ - template - __host__ __device__ - reference(const reference &other, - typename thrust::detail::enable_if_convertible< - typename reference::pointer, - pointer - >::type * = 0) - : super_t(other) - {} - - /*! Copy assignment operator copy assigns from another \p reference of related type. - * - * \param other The other \p reference to assign from. - * \return *this - * \tparam OtherT The element type of the other \p reference. - */ - template - __host__ __device__ - reference &operator=(const reference &other); - - /*! Assignment operator assigns from a \p value_type. - * - * \param x The \p value_type to assign from. - * \return *this - */ - __host__ __device__ - reference &operator=(const value_type &x); -}; // end reference - -/*! Exchanges the values of two objects referred to by \p reference. - * \p x The first \p reference of interest. - * \p y The second \p reference ot interest. - */ -template -__host__ __device__ -void swap(reference x, reference y); - -/*! Allocates an area of memory available to Thrust's cuda system. - * \param n Number of bytes to allocate. - * \return A cuda::pointer pointing to the beginning of the newly - * allocated memory. A null cuda::pointer is returned if - * an error occurs. - * \note The cuda::pointer returned by this function must be - * deallocated with \p cuda::free. - * \see cuda::free - * \see std::malloc - */ -inline pointer malloc(std::size_t n); - -/*! Allocates a typed area of memory available to Thrust's cuda system. - * \param n Number of elements to allocate. - * \return A cuda::pointer pointing to the beginning of the newly - * allocated memory. A null cuda::pointer is returned if - * an error occurs. - * \note The cuda::pointer returned by this function must be - * deallocated with \p cuda::free. - * \see cuda::free - * \see std::malloc - */ -template -inline pointer malloc(std::size_t n); - -/*! Deallocates an area of memory previously allocated by cuda::malloc. - * \param ptr A cuda::pointer pointing to the beginning of an area - * of memory previously allocated with cuda::malloc. - * \see cuda::malloc - * \see std::free - */ -inline void free(pointer ptr); - -// XXX upon c++11 -// template using allocator = thrust::detail::malloc_allocator >; - -/*! \p cuda::allocator is the default allocator used by the \p cuda system's containers such as - * cuda::vector if no user-specified allocator is provided. \p cuda::allocator allocates - * (deallocates) storage with \p cuda::malloc (\p cuda::free). - */ -template - struct allocator - : thrust::detail::malloc_allocator< - T, - tag, - pointer - > -{ - /*! The \p rebind metafunction provides the type of an \p allocator - * instantiated with another type. - * - * \tparam U The other type to use for instantiation. - */ - template - struct rebind - { - /*! The typedef \p other gives the type of the rebound \p allocator. - */ - typedef allocator other; - }; - - /*! No-argument constructor has no effect. - */ - __host__ __device__ - inline allocator() {} - - /*! Copy constructor has no effect. - */ - __host__ __device__ - inline allocator(const allocator &) {} - - /*! Constructor from other \p allocator has no effect. - */ - template - __host__ __device__ - inline allocator(const allocator &) {} - - /*! Destructor has no effect. - */ - __host__ __device__ - inline ~allocator() {} -}; // end allocator - -} // end cuda - -/*! \} - */ - -} // end system - -/*! \namespace thrust::cuda - * \brief \p thrust::cuda is a top-level alias for thrust::system::cuda. - */ -namespace cuda -{ - -using thrust::system::cuda::pointer; -using thrust::system::cuda::reference; -using thrust::system::cuda::malloc; -using thrust::system::cuda::free; -using thrust::system::cuda::allocator; - -} // end cuda - -} // end thrust - -#include - diff --git a/compat/thrust/system/cuda/vector.h b/compat/thrust/system/cuda/vector.h deleted file mode 100644 index ac47a84e1d..0000000000 --- a/compat/thrust/system/cuda/vector.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ccudaliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/cuda/vector.h - * \brief A dynamically-sizable array of elements which reside in memory available to - * Thrust's CUDA system. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -// forward declaration of host_vector -template class host_vector; - -namespace system -{ -namespace cuda -{ - -// XXX upon c++11 -// template > using vector = thrust::detail::vector_base; - -/*! \p cuda::vector is a container that supports random access to elements, - * constant time removal of elements at the end, and linear time insertion - * and removal of elements at the beginning or in the middle. The number of - * elements in a \p cuda::vector may vary dynamically; memory management is - * automatic. The elements contained in a \p cuda::vector reside in memory - * available to the \p cuda system. - * - * \tparam T The element type of the \p cuda::vector. - * \tparam Allocator The allocator type of the \p cuda::vector. Defaults to \p cuda::allocator. - * - * \see http://www.sgi.com/tech/stl/Vector.html - * \see host_vector For the documentation of the complete interface which is - * shared by \p cuda::vector - * \see device_vector - */ -template > - class vector - : public thrust::detail::vector_base -{ - /*! \cond - */ - private: - typedef thrust::detail::vector_base super_t; - /*! \endcond - */ - - public: - - /*! \cond - */ - typedef typename super_t::size_type size_type; - typedef typename super_t::value_type value_type; - /*! \endcond - */ - - /*! This constructor creates an empty \p cuda::vector. - */ - vector(); - - /*! This constructor creates a \p cuda::vector with \p n default-constructed elements. - * \param n The size of the \p cuda::vector to create. - */ - explicit vector(size_type n); - - /*! This constructor creates a \p cuda::vector with \p n copies of \p value. - * \param n The size of the \p cuda::vector to create. - * \param value An element to copy. - */ - explicit vector(size_type n, const value_type &value); - - /*! Copy constructor copies from another \p cuda::vector. - * \param x The other \p cuda::vector to copy. - */ - vector(const vector &x); - - /*! This constructor copies from another Thrust vector-like object. - * \param x The other object to copy from. - */ - template - vector(const thrust::detail::vector_base &x); - - /*! This constructor copies from a \c std::vector. - * \param x The \c std::vector to copy from. - */ - template - vector(const std::vector &x); - - /*! This constructor creates a \p cuda::vector by copying from a range. - * \param first The beginning of the range. - * \param last The end of the range. - */ - template - vector(InputIterator first, InputIterator last); - - // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns - // - /*! Assignment operator assigns from a \c std::vector. - * \param x The \c std::vector to assign from. - * \return *this - */ - template - vector &operator=(const std::vector &x); - - /*! Assignment operator assigns from another Thrust vector-like object. - * \param x The other object to assign from. - * \return *this - */ - template - vector &operator=(const thrust::detail::vector_base &x); -}; // end vector - -} // end cuda -} // end system - -// alias system::cuda names at top-level -namespace cuda -{ - -using thrust::system::cuda::vector; - -} // end cuda - -} // end thrust - -#include - diff --git a/compat/thrust/system/detail/adl/adjacent_difference.h b/compat/thrust/system/detail/adl/adjacent_difference.h deleted file mode 100644 index 246c1163bb..0000000000 --- a/compat/thrust/system/detail/adl/adjacent_difference.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the adjacent_difference.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch adjacent_difference - -#define __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/adjacent_difference.h> -#include __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER -#undef __THRUST_HOST_SYSTEM_ADJACENT_DIFFERENCE_HEADER - -#define __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/adjacent_difference.h> -#include __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER -#undef __THRUST_DEVICE_SYSTEM_ADJACENT_DIFFERENCE_HEADER - diff --git a/compat/thrust/system/detail/adl/assign_value.h b/compat/thrust/system/detail/adl/assign_value.h deleted file mode 100644 index b5c588ace7..0000000000 --- a/compat/thrust/system/detail/adl/assign_value.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the assign_value.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch assign_value - -#define __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/assign_value.h> -#include __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER -#undef __THRUST_HOST_SYSTEM_ASSIGN_VALUE_HEADER - -#define __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/assign_value.h> -#include __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER -#undef __THRUST_DEVICE_SYSTEM_ASSIGN_VALUE_HEADER - diff --git a/compat/thrust/system/detail/adl/binary_search.h b/compat/thrust/system/detail/adl/binary_search.h deleted file mode 100644 index 7accfbc381..0000000000 --- a/compat/thrust/system/detail/adl/binary_search.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the binary_search.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch binary_search - -#define __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/binary_search.h> -#include __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER -#undef __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER - -#define __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/binary_search.h> -#include __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER -#undef __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER - diff --git a/compat/thrust/system/detail/adl/copy.h b/compat/thrust/system/detail/adl/copy.h deleted file mode 100644 index 91a32cd345..0000000000 --- a/compat/thrust/system/detail/adl/copy.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the copy.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch copy - -#define __THRUST_HOST_SYSTEM_COPY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/copy.h> -#include __THRUST_HOST_SYSTEM_COPY_HEADER -#undef __THRUST_HOST_SYSTEM_COPY_HEADER - -#define __THRUST_DEVICE_SYSTEM_COPY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/copy.h> -#include __THRUST_DEVICE_SYSTEM_COPY_HEADER -#undef __THRUST_DEVICE_SYSTEM_COPY_HEADER - diff --git a/compat/thrust/system/detail/adl/copy_if.h b/compat/thrust/system/detail/adl/copy_if.h deleted file mode 100644 index fd1df977ab..0000000000 --- a/compat/thrust/system/detail/adl/copy_if.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy_if.h of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the copy_if.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch copy_if - -#define __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/copy_if.h> -#include __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER -#undef __THRUST_HOST_SYSTEM_BINARY_SEARCH_HEADER - -#define __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/copy_if.h> -#include __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER -#undef __THRUST_DEVICE_SYSTEM_BINARY_SEARCH_HEADER - diff --git a/compat/thrust/system/detail/adl/count.h b/compat/thrust/system/detail/adl/count.h deleted file mode 100644 index 0dd9591a23..0000000000 --- a/compat/thrust/system/detail/adl/count.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a count of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the count.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch count - -#define __THRUST_HOST_SYSTEM_COUNT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/count.h> -#include __THRUST_HOST_SYSTEM_COUNT_HEADER -#undef __THRUST_HOST_SYSTEM_COUNT_HEADER - -#define __THRUST_DEVICE_SYSTEM_COUNT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/count.h> -#include __THRUST_DEVICE_SYSTEM_COUNT_HEADER -#undef __THRUST_DEVICE_SYSTEM_COUNT_HEADER - diff --git a/compat/thrust/system/detail/adl/equal.h b/compat/thrust/system/detail/adl/equal.h deleted file mode 100644 index f933d4f93a..0000000000 --- a/compat/thrust/system/detail/adl/equal.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a equal of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the equal.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch equal - -#define __THRUST_HOST_SYSTEM_EQUAL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/equal.h> -#include __THRUST_HOST_SYSTEM_EQUAL_HEADER -#undef __THRUST_HOST_SYSTEM_EQUAL_HEADER - -#define __THRUST_DEVICE_SYSTEM_EQUAL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/equal.h> -#include __THRUST_DEVICE_SYSTEM_EQUAL_HEADER -#undef __THRUST_DEVICE_SYSTEM_EQUAL_HEADER - diff --git a/compat/thrust/system/detail/adl/extrema.h b/compat/thrust/system/detail/adl/extrema.h deleted file mode 100644 index c766570fc3..0000000000 --- a/compat/thrust/system/detail/adl/extrema.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a extrema of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the extrema.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch extrema - -#define __THRUST_HOST_SYSTEM_EXTREMA_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/extrema.h> -#include __THRUST_HOST_SYSTEM_EXTREMA_HEADER -#undef __THRUST_HOST_SYSTEM_EXTREMA_HEADER - -#define __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/extrema.h> -#include __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER -#undef __THRUST_DEVICE_SYSTEM_EXTREMA_HEADER - diff --git a/compat/thrust/system/detail/adl/fill.h b/compat/thrust/system/detail/adl/fill.h deleted file mode 100644 index b241b8a3dc..0000000000 --- a/compat/thrust/system/detail/adl/fill.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the fill.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch fill - -#define __THRUST_HOST_SYSTEM_FILL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/fill.h> -#include __THRUST_HOST_SYSTEM_FILL_HEADER -#undef __THRUST_HOST_SYSTEM_FILL_HEADER - -#define __THRUST_DEVICE_SYSTEM_FILL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/fill.h> -#include __THRUST_DEVICE_SYSTEM_FILL_HEADER -#undef __THRUST_DEVICE_SYSTEM_FILL_HEADER - diff --git a/compat/thrust/system/detail/adl/find.h b/compat/thrust/system/detail/adl/find.h deleted file mode 100644 index 7c99f3e7b1..0000000000 --- a/compat/thrust/system/detail/adl/find.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the find.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch find - -#define __THRUST_HOST_SYSTEM_FIND_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/find.h> -#include __THRUST_HOST_SYSTEM_FIND_HEADER -#undef __THRUST_HOST_SYSTEM_FIND_HEADER - -#define __THRUST_DEVICE_SYSTEM_FIND_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/find.h> -#include __THRUST_DEVICE_SYSTEM_FIND_HEADER -#undef __THRUST_DEVICE_SYSTEM_FIND_HEADER - diff --git a/compat/thrust/system/detail/adl/for_each.h b/compat/thrust/system/detail/adl/for_each.h deleted file mode 100644 index 0b2717f34c..0000000000 --- a/compat/thrust/system/detail/adl/for_each.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the for_each.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch for_each - -#define __THRUST_HOST_SYSTEM_FOR_EACH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/for_each.h> -#include __THRUST_HOST_SYSTEM_FOR_EACH_HEADER -#undef __THRUST_HOST_SYSTEM_FOR_EACH_HEADER - -#define __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/for_each.h> -#include __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER -#undef __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER - diff --git a/compat/thrust/system/detail/adl/gather.h b/compat/thrust/system/detail/adl/gather.h deleted file mode 100644 index da4c1d13d5..0000000000 --- a/compat/thrust/system/detail/adl/gather.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the gather.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch gather - -#define __THRUST_HOST_SYSTEM_FOR_EACH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/gather.h> -#include __THRUST_HOST_SYSTEM_FOR_EACH_HEADER -#undef __THRUST_HOST_SYSTEM_FOR_EACH_HEADER - -#define __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/gather.h> -#include __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER -#undef __THRUST_DEVICE_SYSTEM_FOR_EACH_HEADER - diff --git a/compat/thrust/system/detail/adl/generate.h b/compat/thrust/system/detail/adl/generate.h deleted file mode 100644 index 3a988478f0..0000000000 --- a/compat/thrust/system/detail/adl/generate.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the generate.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch generate - -#define __THRUST_HOST_SYSTEM_GENERATE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/generate.h> -#include __THRUST_HOST_SYSTEM_GENERATE_HEADER -#undef __THRUST_HOST_SYSTEM_GENERATE_HEADER - -#define __THRUST_DEVICE_SYSTEM_GENERATE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/generate.h> -#include __THRUST_DEVICE_SYSTEM_GENERATE_HEADER -#undef __THRUST_DEVICE_SYSTEM_GENERATE_HEADER - diff --git a/compat/thrust/system/detail/adl/get_value.h b/compat/thrust/system/detail/adl/get_value.h deleted file mode 100644 index ed4ef2cfef..0000000000 --- a/compat/thrust/system/detail/adl/get_value.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the get_value.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch get_value - -#define __THRUST_HOST_SYSTEM_GET_VALUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/get_value.h> -#include __THRUST_HOST_SYSTEM_GET_VALUE_HEADER -#undef __THRUST_HOST_SYSTEM_GET_VALUE_HEADER - -#define __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/get_value.h> -#include __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER -#undef __THRUST_DEVICE_SYSTEM_GET_VALUE_HEADER - diff --git a/compat/thrust/system/detail/adl/inner_product.h b/compat/thrust/system/detail/adl/inner_product.h deleted file mode 100644 index 18cc65b9c1..0000000000 --- a/compat/thrust/system/detail/adl/inner_product.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the inner_product.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch inner_product - -#define __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/inner_product.h> -#include __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER -#undef __THRUST_HOST_SYSTEM_INNER_PRODUCT_HEADER - -#define __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/inner_product.h> -#include __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER -#undef __THRUST_DEVICE_SYSTEM_INNER_PRODUCT_HEADER - diff --git a/compat/thrust/system/detail/adl/iter_swap.h b/compat/thrust/system/detail/adl/iter_swap.h deleted file mode 100644 index b302c25f7f..0000000000 --- a/compat/thrust/system/detail/adl/iter_swap.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the iter_swap.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch iter_swap - -#define __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/iter_swap.h> -#include __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER -#undef __THRUST_HOST_SYSTEM_ITER_SWAP_HEADER - -#define __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/iter_swap.h> -#include __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER -#undef __THRUST_DEVICE_SYSTEM_ITER_SWAP_HEADER - diff --git a/compat/thrust/system/detail/adl/logical.h b/compat/thrust/system/detail/adl/logical.h deleted file mode 100644 index 585f71af36..0000000000 --- a/compat/thrust/system/detail/adl/logical.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the logical.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch logical - -#define __THRUST_HOST_SYSTEM_LOGICAL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/logical.h> -#include __THRUST_HOST_SYSTEM_LOGICAL_HEADER -#undef __THRUST_HOST_SYSTEM_LOGICAL_HEADER - -#define __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/logical.h> -#include __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER -#undef __THRUST_DEVICE_SYSTEM_LOGICAL_HEADER - diff --git a/compat/thrust/system/detail/adl/malloc_and_free.h b/compat/thrust/system/detail/adl/malloc_and_free.h deleted file mode 100644 index 7d99a260df..0000000000 --- a/compat/thrust/system/detail/adl/malloc_and_free.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the malloc_and_free.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch malloc_and_free - -#define __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/malloc_and_free.h> -#include __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER -#undef __THRUST_HOST_SYSTEM_MALLOC_AND_FREE_HEADER - -#define __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/malloc_and_free.h> -#include __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER -#undef __THRUST_DEVICE_SYSTEM_MALLOC_AND_FREE_HEADER - diff --git a/compat/thrust/system/detail/adl/merge.h b/compat/thrust/system/detail/adl/merge.h deleted file mode 100644 index 59d8aceb49..0000000000 --- a/compat/thrust/system/detail/adl/merge.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the merge.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch merge - -#define __THRUST_HOST_SYSTEM_MERGE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/merge.h> -#include __THRUST_HOST_SYSTEM_MERGE_HEADER -#undef __THRUST_HOST_SYSTEM_MERGE_HEADER - -#define __THRUST_DEVICE_SYSTEM_MERGE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/merge.h> -#include __THRUST_DEVICE_SYSTEM_MERGE_HEADER -#undef __THRUST_DEVICE_SYSTEM_MERGE_HEADER - diff --git a/compat/thrust/system/detail/adl/mismatch.h b/compat/thrust/system/detail/adl/mismatch.h deleted file mode 100644 index d2d1831374..0000000000 --- a/compat/thrust/system/detail/adl/mismatch.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the mismatch.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch mismatch - -#define __THRUST_HOST_SYSTEM_MISMATCH_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/mismatch.h> -#include __THRUST_HOST_SYSTEM_MISMATCH_HEADER -#undef __THRUST_HOST_SYSTEM_MISMATCH_HEADER - -#define __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/mismatch.h> -#include __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER -#undef __THRUST_DEVICE_SYSTEM_MISMATCH_HEADER - diff --git a/compat/thrust/system/detail/adl/partition.h b/compat/thrust/system/detail/adl/partition.h deleted file mode 100644 index efdc60555b..0000000000 --- a/compat/thrust/system/detail/adl/partition.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the partition.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch partition - -#define __THRUST_HOST_SYSTEM_PARTITION_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/partition.h> -#include __THRUST_HOST_SYSTEM_PARTITION_HEADER -#undef __THRUST_HOST_SYSTEM_PARTITION_HEADER - -#define __THRUST_DEVICE_SYSTEM_PARTITION_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/partition.h> -#include __THRUST_DEVICE_SYSTEM_PARTITION_HEADER -#undef __THRUST_DEVICE_SYSTEM_PARTITION_HEADER - diff --git a/compat/thrust/system/detail/adl/reduce.h b/compat/thrust/system/detail/adl/reduce.h deleted file mode 100644 index afa00f9c60..0000000000 --- a/compat/thrust/system/detail/adl/reduce.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the reduce.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch reduce - -#define __THRUST_HOST_SYSTEM_REDUCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reduce.h> -#include __THRUST_HOST_SYSTEM_REDUCE_HEADER -#undef __THRUST_HOST_SYSTEM_REDUCE_HEADER - -#define __THRUST_DEVICE_SYSTEM_REDUCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reduce.h> -#include __THRUST_DEVICE_SYSTEM_REDUCE_HEADER -#undef __THRUST_DEVICE_SYSTEM_REDUCE_HEADER - diff --git a/compat/thrust/system/detail/adl/reduce_by_key.h b/compat/thrust/system/detail/adl/reduce_by_key.h deleted file mode 100644 index eac65b72d2..0000000000 --- a/compat/thrust/system/detail/adl/reduce_by_key.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the reduce_by_key.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch reduce_by_key - -#define __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reduce_by_key.h> -#include __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER -#undef __THRUST_HOST_SYSTEM_REDUCE_BY_KEY_HEADER - -#define __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reduce_by_key.h> -#include __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER -#undef __THRUST_DEVICE_SYSTEM_REDUCE_BY_KEY_HEADER - diff --git a/compat/thrust/system/detail/adl/remove.h b/compat/thrust/system/detail/adl/remove.h deleted file mode 100644 index 9d64be8da3..0000000000 --- a/compat/thrust/system/detail/adl/remove.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the remove.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch remove - -#define __THRUST_HOST_SYSTEM_REMOVE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/remove.h> -#include __THRUST_HOST_SYSTEM_REMOVE_HEADER -#undef __THRUST_HOST_SYSTEM_REMOVE_HEADER - -#define __THRUST_DEVICE_SYSTEM_REMOVE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/remove.h> -#include __THRUST_DEVICE_SYSTEM_REMOVE_HEADER -#undef __THRUST_DEVICE_SYSTEM_REMOVE_HEADER - diff --git a/compat/thrust/system/detail/adl/replace.h b/compat/thrust/system/detail/adl/replace.h deleted file mode 100644 index e4d8bd22ac..0000000000 --- a/compat/thrust/system/detail/adl/replace.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the replace.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch replace - -#define __THRUST_HOST_SYSTEM_REPLACE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/replace.h> -#include __THRUST_HOST_SYSTEM_REPLACE_HEADER -#undef __THRUST_HOST_SYSTEM_REPLACE_HEADER - -#define __THRUST_DEVICE_SYSTEM_REPLACE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/replace.h> -#include __THRUST_DEVICE_SYSTEM_REPLACE_HEADER -#undef __THRUST_DEVICE_SYSTEM_REPLACE_HEADER - diff --git a/compat/thrust/system/detail/adl/reverse.h b/compat/thrust/system/detail/adl/reverse.h deleted file mode 100644 index 8cbcfd833c..0000000000 --- a/compat/thrust/system/detail/adl/reverse.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the reverse.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch reverse - -#define __THRUST_HOST_SYSTEM_REVERSE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/reverse.h> -#include __THRUST_HOST_SYSTEM_REVERSE_HEADER -#undef __THRUST_HOST_SYSTEM_REVERSE_HEADER - -#define __THRUST_DEVICE_SYSTEM_REVERSE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/reverse.h> -#include __THRUST_DEVICE_SYSTEM_REVERSE_HEADER -#undef __THRUST_DEVICE_SYSTEM_REVERSE_HEADER - diff --git a/compat/thrust/system/detail/adl/scan.h b/compat/thrust/system/detail/adl/scan.h deleted file mode 100644 index e70cd9fdda..0000000000 --- a/compat/thrust/system/detail/adl/scan.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the scan.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch scan - -#define __THRUST_HOST_SYSTEM_SCAN_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scan.h> -#include __THRUST_HOST_SYSTEM_SCAN_HEADER -#undef __THRUST_HOST_SYSTEM_SCAN_HEADER - -#define __THRUST_DEVICE_SYSTEM_SCAN_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scan.h> -#include __THRUST_DEVICE_SYSTEM_SCAN_HEADER -#undef __THRUST_DEVICE_SYSTEM_SCAN_HEADER - diff --git a/compat/thrust/system/detail/adl/scan_by_key.h b/compat/thrust/system/detail/adl/scan_by_key.h deleted file mode 100644 index 02c4b84751..0000000000 --- a/compat/thrust/system/detail/adl/scan_by_key.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the scan_by_key.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch scan_by_key - -#define __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scan_by_key.h> -#include __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER -#undef __THRUST_HOST_SYSTEM_SCAN_BY_KEY_HEADER - -#define __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scan_by_key.h> -#include __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER -#undef __THRUST_DEVICE_SYSTEM_SCAN_BY_KEY_HEADER - diff --git a/compat/thrust/system/detail/adl/scatter.h b/compat/thrust/system/detail/adl/scatter.h deleted file mode 100644 index b94b0d9892..0000000000 --- a/compat/thrust/system/detail/adl/scatter.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the scatter.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch scatter - -#define __THRUST_HOST_SYSTEM_SCATTER_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/scatter.h> -#include __THRUST_HOST_SYSTEM_SCATTER_HEADER -#undef __THRUST_HOST_SYSTEM_SCATTER_HEADER - -#define __THRUST_DEVICE_SYSTEM_SCATTER_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/scatter.h> -#include __THRUST_DEVICE_SYSTEM_SCATTER_HEADER -#undef __THRUST_DEVICE_SYSTEM_SCATTER_HEADER - diff --git a/compat/thrust/system/detail/adl/sequence.h b/compat/thrust/system/detail/adl/sequence.h deleted file mode 100644 index 07dcc7b7c1..0000000000 --- a/compat/thrust/system/detail/adl/sequence.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the sequence.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch sequence - -#define __THRUST_HOST_SYSTEM_SEQUENCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/sequence.h> -#include __THRUST_HOST_SYSTEM_SEQUENCE_HEADER -#undef __THRUST_HOST_SYSTEM_SEQUENCE_HEADER - -#define __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/sequence.h> -#include __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER -#undef __THRUST_DEVICE_SYSTEM_SEQUENCE_HEADER - diff --git a/compat/thrust/system/detail/adl/set_operations.h b/compat/thrust/system/detail/adl/set_operations.h deleted file mode 100644 index 9901b46851..0000000000 --- a/compat/thrust/system/detail/adl/set_operations.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the set_operations.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch set_operations - -#define __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/set_operations.h> -#include __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER -#undef __THRUST_HOST_SYSTEM_SET_OPERATIONS_HEADER - -#define __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/set_operations.h> -#include __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER -#undef __THRUST_DEVICE_SYSTEM_SET_OPERATIONS_HEADER - diff --git a/compat/thrust/system/detail/adl/sort.h b/compat/thrust/system/detail/adl/sort.h deleted file mode 100644 index afcb903f87..0000000000 --- a/compat/thrust/system/detail/adl/sort.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the sort.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch sort - -#define __THRUST_HOST_SYSTEM_SORT_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/sort.h> -#include __THRUST_HOST_SYSTEM_SORT_HEADER -#undef __THRUST_HOST_SYSTEM_SORT_HEADER - -#define __THRUST_DEVICE_SYSTEM_SORT_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/sort.h> -#include __THRUST_DEVICE_SYSTEM_SORT_HEADER -#undef __THRUST_DEVICE_SYSTEM_SORT_HEADER - diff --git a/compat/thrust/system/detail/adl/swap_ranges.h b/compat/thrust/system/detail/adl/swap_ranges.h deleted file mode 100644 index c0069369e0..0000000000 --- a/compat/thrust/system/detail/adl/swap_ranges.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the swap_ranges.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch swap_ranges - -#define __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/swap_ranges.h> -#include __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER -#undef __THRUST_HOST_SYSTEM_SWAP_RANGES_HEADER - -#define __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/swap_ranges.h> -#include __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER -#undef __THRUST_DEVICE_SYSTEM_SWAP_RANGES_HEADER - diff --git a/compat/thrust/system/detail/adl/tabulate.h b/compat/thrust/system/detail/adl/tabulate.h deleted file mode 100644 index cb1fdebd11..0000000000 --- a/compat/thrust/system/detail/adl/tabulate.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the tabulate.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch tabulate - -#define __THRUST_HOST_SYSTEM_TABULATE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/tabulate.h> -#include __THRUST_HOST_SYSTEM_TABULATE_HEADER -#undef __THRUST_HOST_SYSTEM_TABULATE_HEADER - -#define __THRUST_DEVICE_SYSTEM_TABULATE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/tabulate.h> -#include __THRUST_DEVICE_SYSTEM_TABULATE_HEADER -#undef __THRUST_DEVICE_SYSTEM_TABULATE_HEADER - diff --git a/compat/thrust/system/detail/adl/temporary_buffer.h b/compat/thrust/system/detail/adl/temporary_buffer.h deleted file mode 100644 index 66df0ea85b..0000000000 --- a/compat/thrust/system/detail/adl/temporary_buffer.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the temporary_buffer.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch get_temporary_buffer or return_temporary_buffer - -#define __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/temporary_buffer.h> -#include __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER -#undef __THRUST_HOST_SYSTEM_TEMPORARY_BUFFER_HEADER - -#define __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/temporary_buffer.h> -#include __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER -#undef __THRUST_DEVICE_SYSTEM_TEMPORARY_BUFFER_HEADER - diff --git a/compat/thrust/system/detail/adl/transform.h b/compat/thrust/system/detail/adl/transform.h deleted file mode 100644 index c9e6a01ea1..0000000000 --- a/compat/thrust/system/detail/adl/transform.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the transform.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch transform - -#define __THRUST_HOST_SYSTEM_TRANSFORM_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform.h> -#include __THRUST_HOST_SYSTEM_TRANSFORM_HEADER -#undef __THRUST_HOST_SYSTEM_TRANSFORM_HEADER - -#define __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform.h> -#include __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER -#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_HEADER - diff --git a/compat/thrust/system/detail/adl/transform_reduce.h b/compat/thrust/system/detail/adl/transform_reduce.h deleted file mode 100644 index 0a5d97749f..0000000000 --- a/compat/thrust/system/detail/adl/transform_reduce.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the transform_reduce.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch transform_reduce - -#define __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform_reduce.h> -#include __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER -#undef __THRUST_HOST_SYSTEM_TRANSFORM_REDUCE_HEADER - -#define __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform_reduce.h> -#include __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER -#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_REDUCE_HEADER - diff --git a/compat/thrust/system/detail/adl/transform_scan.h b/compat/thrust/system/detail/adl/transform_scan.h deleted file mode 100644 index 47c1dc3ae9..0000000000 --- a/compat/thrust/system/detail/adl/transform_scan.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the transform_scan.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch transform_scan - -#define __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/transform_scan.h> -#include __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER -#undef __THRUST_HOST_SYSTEM_TRANSFORM_SCAN_HEADER - -#define __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/transform_scan.h> -#include __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER -#undef __THRUST_DEVICE_SYSTEM_TRANSFORM_SCAN_HEADER - diff --git a/compat/thrust/system/detail/adl/uninitialized_copy.h b/compat/thrust/system/detail/adl/uninitialized_copy.h deleted file mode 100644 index 7cb0b8e401..0000000000 --- a/compat/thrust/system/detail/adl/uninitialized_copy.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the uninitialized_copy.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch uninitialized_copy - -#define __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/uninitialized_copy.h> -#include __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER -#undef __THRUST_HOST_SYSTEM_UNINITIALIZED_COPY_HEADER - -#define __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/uninitialized_copy.h> -#include __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER -#undef __THRUST_DEVICE_SYSTEM_UNINITIALIZED_COPY_HEADER - diff --git a/compat/thrust/system/detail/adl/uninitialized_fill.h b/compat/thrust/system/detail/adl/uninitialized_fill.h deleted file mode 100644 index 9f00b51622..0000000000 --- a/compat/thrust/system/detail/adl/uninitialized_fill.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the uninitialized_fill.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch uninitialized_fill - -#define __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/uninitialized_fill.h> -#include __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER -#undef __THRUST_HOST_SYSTEM_UNINITIALIZED_FILL_HEADER - -#define __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/uninitialized_fill.h> -#include __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER -#undef __THRUST_DEVICE_SYSTEM_UNINITIALIZED_FILL_HEADER - diff --git a/compat/thrust/system/detail/adl/unique.h b/compat/thrust/system/detail/adl/unique.h deleted file mode 100644 index 932ff58e07..0000000000 --- a/compat/thrust/system/detail/adl/unique.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the unique.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch unique - -#define __THRUST_HOST_SYSTEM_UNIQUE_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/unique.h> -#include __THRUST_HOST_SYSTEM_UNIQUE_HEADER -#undef __THRUST_HOST_SYSTEM_UNIQUE_HEADER - -#define __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/unique.h> -#include __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER -#undef __THRUST_DEVICE_SYSTEM_UNIQUE_HEADER - diff --git a/compat/thrust/system/detail/adl/unique_by_key.h b/compat/thrust/system/detail/adl/unique_by_key.h deleted file mode 100644 index 30e6f2f2d1..0000000000 --- a/compat/thrust/system/detail/adl/unique_by_key.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a fill of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// the purpose of this header is to #include the unique_by_key.h header -// of the host and device systems. It should be #included in any -// code which uses adl to dispatch unique_by_key - -#define __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER <__THRUST_HOST_SYSTEM_ROOT/detail/unique_by_key.h> -#include __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER -#undef __THRUST_HOST_SYSTEM_UNIQUE_BY_KEY_HEADER - -#define __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER <__THRUST_DEVICE_SYSTEM_ROOT/detail/unique_by_key.h> -#include __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER -#undef __THRUST_DEVICE_SYSTEM_UNIQUE_BY_KEY_HEADER - diff --git a/compat/thrust/system/detail/bad_alloc.h b/compat/thrust/system/detail/bad_alloc.h deleted file mode 100644 index bb73d1f006..0000000000 --- a/compat/thrust/system/detail/bad_alloc.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ - -// define our own bad_alloc so we can set its .what() -class bad_alloc - : public std::bad_alloc -{ - public: - inline bad_alloc(const std::string &w) - : std::bad_alloc(), m_what() - { - m_what = std::bad_alloc::what(); - m_what += ": "; - m_what += w; - } // end bad_alloc() - - inline virtual ~bad_alloc(void) throw () {}; - - inline virtual const char *what(void) const throw() - { - return m_what.c_str(); - } // end what() - - private: - std::string m_what; -}; // end bad_alloc - -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/errno.h b/compat/thrust/system/detail/errno.h deleted file mode 100644 index 34bc8cc568..0000000000 --- a/compat/thrust/system/detail/errno.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include - -// The rationale for the existence of these apparently redundant definitions is -// to provide them portably and to avoid bringing in system headers which might -// pollute the global namespace. These identifiers are in lowercase to avoid -// colliding with the real macros in errno.h. - -namespace thrust -{ - -namespace system -{ - -namespace detail -{ - -static const int eafnosupport = 9901; -static const int eaddrinuse = 9902; -static const int eaddrnotavail = 9903; -static const int eisconn = 9904; -static const int ebadmsg = 9905; -static const int econnaborted = 9906; -static const int ealready = 9907; -static const int econnrefused = 9908; -static const int econnreset = 9909; -static const int edestaddrreq = 9910; -static const int ehostunreach = 9911; -static const int eidrm = 9912; -static const int emsgsize = 9913; -static const int enetdown = 9914; -static const int enetreset = 9915; -static const int enetunreach = 9916; -static const int enobufs = 9917; -static const int enolink = 9918; -static const int enodata = 9919; -static const int enomsg = 9920; -static const int enoprotoopt = 9921; -static const int enosr = 9922; -static const int enotsock = 9923; -static const int enostr = 9924; -static const int enotconn = 9925; -static const int enotsup = 9926; -static const int ecanceled = 9927; -static const int einprogress = 9928; -static const int eopnotsupp = 9929; -static const int ewouldblock = 9930; -static const int eownerdead = 9931; -static const int eproto = 9932; -static const int eprotonosupport = 9933; -static const int enotrecoverable = 9934; -static const int etime = 9935; -static const int etxtbsy = 9936; -static const int etimedout = 9938; -static const int eloop = 9939; -static const int eoverflow = 9940; -static const int eprototype = 9941; -static const int enosys = 9942; -static const int einval = 9943; -static const int erange = 9944; -static const int eilseq = 9945; -static const int e2big = 9946; -static const int edom = 9947; -static const int efault = 9948; -static const int ebadf = 9949; -static const int epipe = 9950; -static const int exdev = 9951; -static const int ebusy = 9952; -static const int enotempty = 9953; -static const int enoexec = 9954; -static const int eexist = 9955; -static const int efbig = 9956; -static const int enametoolong = 9957; -static const int enotty = 9958; -static const int eintr = 9959; -static const int espipe = 9960; -static const int eio = 9961; -static const int eisdir = 9962; -static const int echild = 9963; -static const int enolck = 9964; -static const int enospc = 9965; -static const int enxio = 9966; -static const int enodev = 9967; -static const int enoent = 9968; -static const int esrch = 9969; -static const int enotdir = 9970; -static const int enomem = 9971; -static const int eperm = 9972; -static const int eacces = 9973; -static const int erofs = 9974; -static const int edeadlk = 9975; -static const int eagain = 9976; -static const int enfile = 9977; -static const int emfile = 9978; -static const int emlink = 9979; - -} // end detail - -} // end system - -} // end thrust - diff --git a/compat/thrust/system/detail/error_category.inl b/compat/thrust/system/detail/error_category.inl deleted file mode 100644 index 8e19c89db5..0000000000 --- a/compat/thrust/system/detail/error_category.inl +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -namespace system -{ - -error_category - ::~error_category(void) -{ - ; -} // end error_category::~error_category() - - -error_condition error_category - ::default_error_condition(int ev) const -{ - return error_condition(ev, *this); -} // end error_category::default_error_condition() - - -bool error_category - ::equivalent(int code, const error_condition &condition) const -{ - return default_error_condition(code) == condition; -} // end error_condition::equivalent() - - -bool error_category - ::equivalent(const error_code &code, int condition) const -{ - bool result = (this->operator==(code.category())) && (code.value() == condition); - return result; -} // end error_code::equivalent() - - -bool error_category - ::operator==(const error_category &rhs) const -{ - return this == &rhs; -} // end error_category::operator==() - - -bool error_category - ::operator!=(const error_category &rhs) const -{ - return !this->operator==(rhs); -} // end error_category::operator!=() - - -bool error_category - ::operator<(const error_category &rhs) const -{ - return thrust::less()(this,&rhs); -} // end error_category::operator<() - - -namespace detail -{ - - -class generic_error_category - : public error_category -{ - public: - inline generic_error_category(void) {} - - inline virtual const char *name(void) const - { - return "generic"; - } - - inline virtual std::string message(int ev) const - { - static const std::string unknown_err("Unknown error"); - - // XXX strerror is not thread-safe: - // prefer strerror_r (which is not provided on windows) - const char *c_str = std::strerror(ev); - return c_str ? std::string(c_str) : unknown_err; - } -}; // end generic_category_result - - -class system_error_category - : public error_category -{ - public: - inline system_error_category(void) {} - - inline virtual const char *name(void) const - { - return "system"; - } - - inline virtual std::string message(int ev) const - { - return generic_category().message(ev); - } - - inline virtual error_condition default_error_condition(int ev) const - { - using namespace errc; - - switch(ev) - { - case eafnosupport: return make_error_condition(address_family_not_supported); - case eaddrinuse: return make_error_condition(address_in_use); - case eaddrnotavail: return make_error_condition(address_not_available); - case eisconn: return make_error_condition(already_connected); - case e2big: return make_error_condition(argument_list_too_long); - case edom: return make_error_condition(argument_out_of_domain); - case efault: return make_error_condition(bad_address); - case ebadf: return make_error_condition(bad_file_descriptor); - case ebadmsg: return make_error_condition(bad_message); - case epipe: return make_error_condition(broken_pipe); - case econnaborted: return make_error_condition(connection_aborted); - case ealready: return make_error_condition(connection_already_in_progress); - case econnrefused: return make_error_condition(connection_refused); - case econnreset: return make_error_condition(connection_reset); - case exdev: return make_error_condition(cross_device_link); - case edestaddrreq: return make_error_condition(destination_address_required); - case ebusy: return make_error_condition(device_or_resource_busy); - case enotempty: return make_error_condition(directory_not_empty); - case enoexec: return make_error_condition(executable_format_error); - case eexist: return make_error_condition(file_exists); - case efbig: return make_error_condition(file_too_large); - case enametoolong: return make_error_condition(filename_too_long); - case enosys: return make_error_condition(function_not_supported); - case ehostunreach: return make_error_condition(host_unreachable); - case eidrm: return make_error_condition(identifier_removed); - case eilseq: return make_error_condition(illegal_byte_sequence); - case enotty: return make_error_condition(inappropriate_io_control_operation); - case eintr: return make_error_condition(interrupted); - case einval: return make_error_condition(invalid_argument); - case espipe: return make_error_condition(invalid_seek); - case eio: return make_error_condition(io_error); - case eisdir: return make_error_condition(is_a_directory); - case emsgsize: return make_error_condition(message_size); - case enetdown: return make_error_condition(network_down); - case enetreset: return make_error_condition(network_reset); - case enetunreach: return make_error_condition(network_unreachable); - case enobufs: return make_error_condition(no_buffer_space); - case echild: return make_error_condition(no_child_process); - case enolink: return make_error_condition(no_link); - case enolck: return make_error_condition(no_lock_available); - case enodata: return make_error_condition(no_message_available); - case enomsg: return make_error_condition(no_message); - case enoprotoopt: return make_error_condition(no_protocol_option); - case enospc: return make_error_condition(no_space_on_device); - case enosr: return make_error_condition(no_stream_resources); - case enxio: return make_error_condition(no_such_device_or_address); - case enodev: return make_error_condition(no_such_device); - case enoent: return make_error_condition(no_such_file_or_directory); - case esrch: return make_error_condition(no_such_process); - case enotdir: return make_error_condition(not_a_directory); - case enotsock: return make_error_condition(not_a_socket); - case enostr: return make_error_condition(not_a_stream); - case enotconn: return make_error_condition(not_connected); - case enomem: return make_error_condition(not_enough_memory); - case enotsup: return make_error_condition(not_supported); - case ecanceled: return make_error_condition(operation_canceled); - case einprogress: return make_error_condition(operation_in_progress); - case eperm: return make_error_condition(operation_not_permitted); - case eopnotsupp: return make_error_condition(operation_not_supported); - case ewouldblock: return make_error_condition(operation_would_block); - case eownerdead: return make_error_condition(owner_dead); - case eacces: return make_error_condition(permission_denied); - case eproto: return make_error_condition(protocol_error); - case eprotonosupport: return make_error_condition(protocol_not_supported); - case erofs: return make_error_condition(read_only_file_system); - case edeadlk: return make_error_condition(resource_deadlock_would_occur); - case eagain: return make_error_condition(resource_unavailable_try_again); - case erange: return make_error_condition(result_out_of_range); - case enotrecoverable: return make_error_condition(state_not_recoverable); - case etime: return make_error_condition(stream_timeout); - case etxtbsy: return make_error_condition(text_file_busy); - case etimedout: return make_error_condition(timed_out); - case enfile: return make_error_condition(too_many_files_open_in_system); - case emfile: return make_error_condition(too_many_files_open); - case emlink: return make_error_condition(too_many_links); - case eloop: return make_error_condition(too_many_symbolic_link_levels); - case eoverflow: return make_error_condition(value_too_large); - case eprototype: return make_error_condition(wrong_protocol_type); - default: return error_condition(ev,system_category()); - } - } -}; // end system_category_result - - -} // end detail - - -const error_category &generic_category(void) -{ - static const detail::generic_error_category result; - return result; -} - - -const error_category &system_category(void) -{ - static const detail::system_error_category result; - return result; -} - - -} // end system - -} // end thrust - diff --git a/compat/thrust/system/detail/error_code.inl b/compat/thrust/system/detail/error_code.inl deleted file mode 100644 index 0cf86b4821..0000000000 --- a/compat/thrust/system/detail/error_code.inl +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include - -namespace thrust -{ - -namespace system -{ - -error_code - ::error_code(void) - :m_val(0),m_cat(&system_category()) -{ - ; -} // end error_code::error_code() - - -error_code - ::error_code(int val, const error_category &cat) - :m_val(val),m_cat(&cat) -{ - ; -} // end error_code::error_code() - - -template - error_code - ::error_code(ErrorCodeEnum e -// XXX WAR msvc's problem with enable_if -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - , typename thrust::detail::enable_if::value>::type * -#endif // THRUST_HOST_COMPILER_MSVC - ) -{ - *this = make_error_code(e); -} // end error_code::error_code() - - -void error_code - ::assign(int val, const error_category &cat) -{ - m_val = val; - m_cat = &cat; -} // end error_code::assign() - - -template -// XXX WAR msvc's problem with enable_if -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - typename thrust::detail::enable_if::value, error_code>::type & -#else - error_code & -#endif // THRUST_HOST_COMPILER_MSVC - error_code - ::operator=(ErrorCodeEnum e) -{ - *this = make_error_code(e); - return *this; -} // end error_code::operator=() - - -void error_code - ::clear(void) -{ - m_val = 0; - m_cat = &system_category(); -} // end error_code::clear() - - -int error_code - ::value(void) const -{ - return m_val; -} // end error_code::value() - - -const error_category &error_code - ::category(void) const -{ - return *m_cat; -} // end error_code::category() - - -error_condition error_code - ::default_error_condition(void) const -{ - return category().default_error_condition(value()); -} // end error_code::default_error_condition() - - -std::string error_code - ::message(void) const -{ - return category().message(value()); -} // end error_code::message() - - -error_code - ::operator bool (void) const -{ - return value() != 0; -} // end error_code::operator bool () - - -error_code make_error_code(errc::errc_t e) -{ - return error_code(static_cast(e), generic_category()); -} // end make_error_code() - - -bool operator<(const error_code &lhs, const error_code &rhs) -{ - bool result = lhs.category().operator<(rhs.category()); - result = result || lhs.category().operator==(rhs.category()); - result = result || lhs.value() < rhs.value(); - return result; -} // end operator==() - - -template - std::basic_ostream& - operator<<(std::basic_ostream &os, const error_code &ec) -{ - return os << ec.category().name() << ':' << ec.value(); -} // end operator<<() - - -bool operator==(const error_code &lhs, const error_code &rhs) -{ - return lhs.category().operator==(rhs.category()) && lhs.value() == rhs.value(); -} // end operator==() - - -bool operator==(const error_code &lhs, const error_condition &rhs) -{ - return lhs.category().equivalent(lhs.value(), rhs) || rhs.category().equivalent(lhs,rhs.value()); -} // end operator==() - - -bool operator==(const error_condition &lhs, const error_code &rhs) -{ - return rhs.category().equivalent(lhs.value(), lhs) || lhs.category().equivalent(rhs, lhs.value()); -} // end operator==() - - -bool operator==(const error_condition &lhs, const error_condition &rhs) -{ - return lhs.category().operator==(rhs.category()) && lhs.value() == rhs.value(); -} // end operator==() - - -bool operator!=(const error_code &lhs, const error_code &rhs) -{ - return !(lhs == rhs); -} // end operator!=() - - -bool operator!=(const error_code &lhs, const error_condition &rhs) -{ - return !(lhs == rhs); -} // end operator!=() - - -bool operator!=(const error_condition &lhs, const error_code &rhs) -{ - return !(lhs == rhs); -} // end operator!=() - - -bool operator!=(const error_condition &lhs, const error_condition &rhs) -{ - return !(lhs == rhs); -} // end operator!=() - - -} // end system - -} // end thrust - diff --git a/compat/thrust/system/detail/error_condition.inl b/compat/thrust/system/detail/error_condition.inl deleted file mode 100644 index 00fbaf091d..0000000000 --- a/compat/thrust/system/detail/error_condition.inl +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ - -namespace system -{ - -error_condition - ::error_condition(void) - :m_val(0),m_cat(&generic_category()) -{ - ; -} // end error_condition::error_condition() - - -error_condition - ::error_condition(int val, const error_category &cat) - :m_val(val),m_cat(&cat) -{ - ; -} // end error_condition::error_condition() - - -template - error_condition - ::error_condition(ErrorConditionEnum e -// XXX WAR msvc's problem with enable_if -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - , typename thrust::detail::enable_if::value>::type * -#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - ) -{ - *this = make_error_condition(e); -} // end error_condition::error_condition() - - -void error_condition - ::assign(int val, const error_category &cat) -{ - m_val = val; - m_cat = &cat; -} // end error_category::assign() - - -template -// XXX WAR msvc's problem with enable_if -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - typename thrust::detail::enable_if::value, error_condition>::type & -#else - error_condition & -#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - error_condition - ::operator=(ErrorConditionEnum e) -{ - *this = make_error_condition(e); - return *this; -} // end error_condition::operator=() - - -void error_condition - ::clear(void) -{ - m_val = 0; - m_cat = &generic_category(); -} // end error_condition::clear() - - -int error_condition - ::value(void) const -{ - return m_val; -} // end error_condition::value() - - -const error_category &error_condition - ::category(void) const -{ - return *m_cat; -} // end error_condition::category() - - -std::string error_condition - ::message(void) const -{ - return category().message(value()); -} // end error_condition::message() - - -error_condition - ::operator bool (void) const -{ - return value() != 0; -} // end error_condition::operator bool () - - -error_condition make_error_condition(errc::errc_t e) -{ - return error_condition(static_cast(e), generic_category()); -} // end make_error_condition() - - -bool operator<(const error_condition &lhs, - const error_condition &rhs) -{ - return lhs.category().operator<(rhs.category()) || (lhs.category().operator==(rhs.category()) && (lhs.value() < rhs.value())); -} // end operator<() - - -} // end system - -} // end thrust - diff --git a/compat/thrust/system/detail/generic/adjacent_difference.h b/compat/thrust/system/detail/generic/adjacent_difference.h deleted file mode 100644 index bb340df490..0000000000 --- a/compat/thrust/system/detail/generic/adjacent_difference.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file adjacent_difference.h - * \brief Generic implementation of adjacent_difference. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -OutputIterator adjacent_difference(thrust::execution_policy &exec, - InputIterator first, InputIterator last, - OutputIterator result); - -template -OutputIterator adjacent_difference(thrust::execution_policy &exec, - InputIterator first, InputIterator last, - OutputIterator result, - BinaryFunction binary_op); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/adjacent_difference.inl b/compat/thrust/system/detail/generic/adjacent_difference.inl deleted file mode 100644 index 619b29f860..0000000000 --- a/compat/thrust/system/detail/generic/adjacent_difference.inl +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -OutputIterator adjacent_difference(thrust::execution_policy &exec, - InputIterator first, InputIterator last, - OutputIterator result) -{ - typedef typename thrust::iterator_traits::value_type InputType; - thrust::minus binary_op; - - return thrust::adjacent_difference(exec, first, last, result, binary_op); -} // end adjacent_difference() - -template -OutputIterator adjacent_difference(thrust::execution_policy &exec, - InputIterator first, InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - if(first == last) - { - // empty range, nothing to do - return result; - } - else - { - // an in-place operation is requested, copy the input and call the entry point - // XXX a special-purpose kernel would be faster here since - // only block boundaries need to be copied - thrust::detail::temporary_array input_copy(exec, first, last); - - *result = *first; - thrust::transform(exec, input_copy.begin() + 1, input_copy.end(), input_copy.begin(), result + 1, binary_op); - } - - return result + (last - first); -} - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/advance.h b/compat/thrust/system/detail/generic/advance.h deleted file mode 100644 index 249aac7e54..0000000000 --- a/compat/thrust/system/detail/generic/advance.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -void advance(InputIterator& i, Distance n); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/advance.inl b/compat/thrust/system/detail/generic/advance.inl deleted file mode 100644 index b95737ad9e..0000000000 --- a/compat/thrust/system/detail/generic/advance.inl +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -template -void advance(InputIterator& i, Distance n, thrust::incrementable_traversal_tag) -{ - while(n) - { - ++i; - --n; - } // end while -} // end advance() - -template -void advance(InputIterator& i, Distance n, thrust::random_access_traversal_tag) -{ - i += n; -} // end advance() - -} // end detail - -template -void advance(InputIterator& i, Distance n) -{ - // dispatch on iterator traversal - thrust::system::detail::generic::detail::advance(i, n, - typename thrust::iterator_traversal::type()); -} // end advance() - -} // end namespace detail -} // end namespace generic -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/binary_search.h b/compat/thrust/system/detail/generic/binary_search.h deleted file mode 100644 index 7fd6c506ee..0000000000 --- a/compat/thrust/system/detail/generic/binary_search.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file binary_search.h - * \brief Generic implementations of binary search functions. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template -ForwardIterator lower_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value); - -template -ForwardIterator lower_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp); - - -template -ForwardIterator upper_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value); - -template -ForwardIterator upper_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp); - - -template -bool binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value); - -template -bool binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp); - - -template -OutputIterator lower_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output); - -template -OutputIterator lower_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output, - StrictWeakOrdering comp); - - -template -OutputIterator upper_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output); - -template -OutputIterator upper_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output, - StrictWeakOrdering comp); - - -template -OutputIterator binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output); - -template -OutputIterator binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output, - StrictWeakOrdering comp); - - -template -thrust::pair -equal_range(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable &value); - -template -thrust::pair -equal_range(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable &value, - StrictWeakOrdering comp); - - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/binary_search.inl b/compat/thrust/system/detail/generic/binary_search.inl deleted file mode 100644 index 151ac0ea35..0000000000 --- a/compat/thrust/system/detail/generic/binary_search.inl +++ /dev/null @@ -1,342 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file binary_search.inl - * \brief Inline file for binary_search.h - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include - -namespace thrust -{ -namespace detail -{ - -// XXX WAR circular #inclusion with this forward declaration -template class temporary_array; - -} // end detail -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - - -// short names to avoid nvcc bug -struct lbf -{ - template - __host__ __device__ - typename thrust::iterator_traits::difference_type - operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp) - { - return thrust::system::detail::generic::scalar::lower_bound(begin, end, value, comp) - begin; - } -}; - -struct ubf -{ - template - __host__ __device__ - typename thrust::iterator_traits::difference_type - operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp){ - return thrust::system::detail::generic::scalar::upper_bound(begin, end, value, comp) - begin; - } -}; - -struct bsf -{ - template - __host__ __device__ - bool operator()(RandomAccessIterator begin, RandomAccessIterator end, const T& value, StrictWeakOrdering comp){ - RandomAccessIterator iter = thrust::system::detail::generic::scalar::lower_bound(begin, end, value, comp); - - thrust::detail::host_device_function wrapped_comp(comp); - - return iter != end && !wrapped_comp(value, *iter); - } -}; - - -template -struct binary_search_functor -{ - ForwardIterator begin; - ForwardIterator end; - StrictWeakOrdering comp; - BinarySearchFunction func; - - binary_search_functor(ForwardIterator begin, ForwardIterator end, StrictWeakOrdering comp, BinarySearchFunction func) - : begin(begin), end(end), comp(comp), func(func) {} - - template - __host__ __device__ - void operator()(Tuple t) - { - thrust::get<1>(t) = func(begin, end, thrust::get<0>(t), comp); - } -}; // binary_search_functor - - -// Vector Implementation -template -OutputIterator binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output, - StrictWeakOrdering comp, - BinarySearchFunction func) -{ - thrust::for_each(exec, - thrust::make_zip_iterator(thrust::make_tuple(values_begin, output)), - thrust::make_zip_iterator(thrust::make_tuple(values_end, output + thrust::distance(values_begin, values_end))), - detail::binary_search_functor(begin, end, comp, func)); - - return output + thrust::distance(values_begin, values_end); -} - - - -// Scalar Implementation -template -OutputType binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp, - BinarySearchFunction func) -{ - // use the vectorized path to implement the scalar version - - // allocate device buffers for value and output - thrust::detail::temporary_array d_value(exec,1); - thrust::detail::temporary_array d_output(exec,1); - - // copy value to device - d_value[0] = value; - - // perform the query - thrust::system::detail::generic::detail::binary_search(exec, begin, end, d_value.begin(), d_value.end(), d_output.begin(), comp, func); - - // copy result to host and return - return d_output[0]; -} - -} // end namespace detail - - -////////////////////// -// Scalar Functions // -////////////////////// - -template -ForwardIterator lower_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value) -{ - return thrust::lower_bound(exec, begin, end, value, thrust::less()); -} - -template -ForwardIterator lower_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_traits::difference_type difference_type; - - return begin + detail::binary_search(exec, begin, end, value, comp, detail::lbf()); -} - - -template -ForwardIterator upper_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value) -{ - return thrust::upper_bound(exec, begin, end, value, thrust::less()); -} - -template -ForwardIterator upper_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_traits::difference_type difference_type; - - return begin + detail::binary_search(exec, begin, end, value, comp, detail::ubf()); -} - - -template -bool binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value) -{ - return thrust::binary_search(exec, begin, end, value, thrust::less()); -} - -template -bool binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp) -{ - return detail::binary_search(exec, begin, end, value, comp, detail::bsf()); -} - - -////////////////////// -// Vector Functions // -////////////////////// - -template -OutputIterator lower_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output) -{ - typedef typename thrust::iterator_value::type ValueType; - - return thrust::lower_bound(exec, begin, end, values_begin, values_end, output, thrust::less()); -} - -template -OutputIterator lower_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output, - StrictWeakOrdering comp) -{ - return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::lbf()); -} - - -template -OutputIterator upper_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output) -{ - typedef typename thrust::iterator_value::type ValueType; - - return thrust::upper_bound(exec, begin, end, values_begin, values_end, output, thrust::less()); -} - -template -OutputIterator upper_bound(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output, - StrictWeakOrdering comp) -{ - return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::ubf()); -} - - -template -OutputIterator binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output) -{ - typedef typename thrust::iterator_value::type ValueType; - - return thrust::binary_search(exec, begin, end, values_begin, values_end, output, thrust::less()); -} - -template -OutputIterator binary_search(thrust::execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - InputIterator values_begin, - InputIterator values_end, - OutputIterator output, - StrictWeakOrdering comp) -{ - return detail::binary_search(exec, begin, end, values_begin, values_end, output, comp, detail::bsf()); -} - - -template -thrust::pair -equal_range(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const LessThanComparable &value) -{ - return thrust::equal_range(exec, first, last, value, thrust::less()); -} - - -template -thrust::pair -equal_range(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &value, - StrictWeakOrdering comp) -{ - ForwardIterator lb = thrust::lower_bound(exec, first, last, value, comp); - ForwardIterator ub = thrust::upper_bound(exec, first, last, value, comp); - return thrust::make_pair(lb, ub); -} - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/copy.h b/compat/thrust/system/detail/generic/copy.h deleted file mode 100644 index 8df98fe67f..0000000000 --- a/compat/thrust/system/detail/generic/copy.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -template - OutputIterator copy_n(thrust::execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result); - - -} // end generic -} // end detail -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/detail/generic/copy.inl b/compat/thrust/system/detail/generic/copy.inl deleted file mode 100644 index e081015f8e..0000000000 --- a/compat/thrust/system/detail/generic/copy.inl +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - typedef typename thrust::iterator_value::type T; - return thrust::transform(exec, first, last, result, thrust::identity()); -} // end copy() - - -template - OutputIterator copy_n(thrust::execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result) -{ - typedef typename thrust::iterator_value::type value_type; - typedef thrust::identity xfrm_type; - - // XXX why do we need to do this? figure out why, and then see if we can do without - typedef typename thrust::detail::unary_transform_functor::type functor_type; - - typedef thrust::tuple iterator_tuple; - typedef thrust::zip_iterator zip_iter; - - zip_iter zipped = thrust::make_zip_iterator(thrust::make_tuple(first,result)); - - return thrust::get<1>(thrust::for_each_n(exec, zipped, n, functor_type(xfrm_type())).get_iterator_tuple()); -} // end copy_n() - - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/copy_if.h b/compat/thrust/system/detail/generic/copy_if.h deleted file mode 100644 index 183f012a03..0000000000 --- a/compat/thrust/system/detail/generic/copy_if.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator copy_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - - -template - OutputIterator copy_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/copy_if.inl b/compat/thrust/system/detail/generic/copy_if.inl deleted file mode 100644 index 145561c9bc..0000000000 --- a/compat/thrust/system/detail/generic/copy_if.inl +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -template -OutputIterator copy_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - __THRUST_DISABLE_MSVC_POSSIBLE_LOSS_OF_DATA_WARNING(IndexType n = thrust::distance(first, last)); - - // compute {0,1} predicates - thrust::detail::temporary_array predicates(exec, n); - thrust::transform(exec, - stencil, - stencil + n, - predicates.begin(), - thrust::detail::predicate_to_integral(pred)); - - // scan {0,1} predicates - thrust::detail::temporary_array scatter_indices(exec, n); - thrust::exclusive_scan(exec, - predicates.begin(), - predicates.end(), - scatter_indices.begin(), - static_cast(0), - thrust::plus()); - - // scatter the true elements - thrust::scatter_if(exec, - first, - last, - scatter_indices.begin(), - predicates.begin(), - result, - thrust::identity()); - - // find the end of the new sequence - IndexType output_size = scatter_indices[n - 1] + predicates[n - 1]; - - return result + output_size; -} - -} // end namespace detail - - -template - OutputIterator copy_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - // XXX it's potentially expensive to send [first,last) twice - // we should probably specialize this case for POD - // since we can safely keep the input in a temporary instead - // of doing two loads - return thrust::copy_if(exec, first, last, first, result, pred); -} // end copy_if() - - -template - OutputIterator copy_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - typedef typename thrust::iterator_traits::difference_type difference_type; - - // empty sequence - if(first == last) - return result; - - difference_type n = thrust::distance(first, last); - - // create an unsigned version of n (we know n is positive from the comparison above) - // to avoid a warning in the compare below - typename thrust::detail::make_unsigned::type unsigned_n(n); - - // use 32-bit indices when possible (almost always) - if(sizeof(difference_type) > sizeof(unsigned int) && unsigned_n > (std::numeric_limits::max)()) - { - result = detail::copy_if(exec, first, last, stencil, result, pred); - } // end if - else - { - result = detail::copy_if(exec, first, last, stencil, result, pred); - } // end else - - return result; -} // end copy_if() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/count.h b/compat/thrust/system/detail/generic/count.h deleted file mode 100644 index bc4899e6d0..0000000000 --- a/compat/thrust/system/detail/generic/count.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -typename thrust::iterator_traits::difference_type -count(thrust::execution_policy &exec, InputIterator first, InputIterator last, const EqualityComparable& value); - -template -typename thrust::iterator_traits::difference_type -count_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/count.inl b/compat/thrust/system/detail/generic/count.inl deleted file mode 100644 index e3ab8714b7..0000000000 --- a/compat/thrust/system/detail/generic/count.inl +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -struct count_if_transform -{ - __host__ __device__ - count_if_transform(Predicate _pred) : pred(_pred){} - - __host__ __device__ - CountType operator()(const InputType& val) - { - if(pred(val)) - return 1; - else - return 0; - } // end operator() - - Predicate pred; -}; // end count_if_transform - -template -typename thrust::iterator_traits::difference_type -count(thrust::execution_policy &exec, InputIterator first, InputIterator last, const EqualityComparable& value) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - // XXX use placeholder expression here - return thrust::count_if(exec, first, last, thrust::detail::equal_to_value(value)); -} // end count() - -template -typename thrust::iterator_traits::difference_type -count_if(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) -{ - typedef typename thrust::iterator_traits::value_type InputType; - typedef typename thrust::iterator_traits::difference_type CountType; - - thrust::system::detail::generic::count_if_transform unary_op(pred); - thrust::plus binary_op; - return thrust::transform_reduce(exec, first, last, unary_op, CountType(0), binary_op); -} // end count_if() - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/distance.h b/compat/thrust/system/detail/generic/distance.h deleted file mode 100644 index 80f051ca53..0000000000 --- a/compat/thrust/system/detail/generic/distance.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - inline typename thrust::iterator_traits::difference_type - distance(InputIterator first, InputIterator last); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/distance.inl b/compat/thrust/system/detail/generic/distance.inl deleted file mode 100644 index a1fdf1458b..0000000000 --- a/compat/thrust/system/detail/generic/distance.inl +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -template - inline typename thrust::iterator_traits::difference_type - distance(InputIterator first, InputIterator last, thrust::incrementable_traversal_tag) -{ - typename thrust::iterator_traits::difference_type result(0); - - while(first != last) - { - ++first; - ++result; - } // end while - - return result; -} // end advance() - -template - inline typename thrust::iterator_traits::difference_type - distance(InputIterator first, InputIterator last, thrust::random_access_traversal_tag) -{ - return last - first; -} // end distance() - -} // end detail - -template - inline typename thrust::iterator_traits::difference_type - distance(InputIterator first, InputIterator last) -{ - // dispatch on iterator traversal - return thrust::system::detail::generic::detail::distance(first, last, - typename thrust::iterator_traversal::type()); -} // end advance() - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/equal.h b/compat/thrust/system/detail/generic/equal.h deleted file mode 100644 index da7d105825..0000000000 --- a/compat/thrust/system/detail/generic/equal.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2); - -template -bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/equal.inl b/compat/thrust/system/detail/generic/equal.inl deleted file mode 100644 index 12b8005a2a..0000000000 --- a/compat/thrust/system/detail/generic/equal.inl +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) -{ - typedef typename thrust::iterator_traits::value_type InputType1; - - return thrust::equal(exec, first1, last1, first2, thrust::detail::equal_to()); -} - -template -bool equal(thrust::execution_policy &exec, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate binary_pred) -{ - return thrust::mismatch(exec, first1, last1, first2, binary_pred).first == last1; -} - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/extrema.h b/compat/thrust/system/detail/generic/extrema.h deleted file mode 100644 index abb4ddc210..0000000000 --- a/compat/thrust/system/detail/generic/extrema.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file extrema.h - * \brief Generic device implementations of extrema functions. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -ForwardIterator max_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last); - -template -ForwardIterator max_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp); - -template -ForwardIterator min_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last); - -template -ForwardIterator min_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp); - -template -thrust::pair minmax_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last); - -template -thrust::pair minmax_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/extrema.inl b/compat/thrust/system/detail/generic/extrema.inl deleted file mode 100644 index b5f92c3935..0000000000 --- a/compat/thrust/system/detail/generic/extrema.inl +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file distance.h - * \brief Device implementations for distance. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -////////////// -// Functors // -////////////// - -// return the smaller/larger element making sure to prefer the -// first occurance of the minimum/maximum element -template -struct min_element_reduction -{ - BinaryPredicate comp; - - __host__ __device__ - min_element_reduction(BinaryPredicate comp) : comp(comp){} - - __host__ __device__ - thrust::tuple - operator()(const thrust::tuple& lhs, - const thrust::tuple& rhs ) - { - if(comp(thrust::get<0>(lhs), thrust::get<0>(rhs))) - return lhs; - if(comp(thrust::get<0>(rhs), thrust::get<0>(lhs))) - return rhs; - - // values are equivalent, prefer value with smaller index - if(thrust::get<1>(lhs) < thrust::get<1>(rhs)) - return lhs; - else - return rhs; - } // end operator()() - -}; // end min_element_reduction - - -template -struct max_element_reduction -{ - BinaryPredicate comp; - - __host__ __device__ - max_element_reduction(BinaryPredicate comp) : comp(comp){} - - __host__ __device__ - thrust::tuple - operator()(const thrust::tuple& lhs, - const thrust::tuple& rhs ) - { - if(comp(thrust::get<0>(lhs), thrust::get<0>(rhs))) - return rhs; - if(comp(thrust::get<0>(rhs), thrust::get<0>(lhs))) - return lhs; - - // values are equivalent, prefer value with smaller index - if(thrust::get<1>(lhs) < thrust::get<1>(rhs)) - return lhs; - else - return rhs; - } // end operator()() - -}; // end max_element_reduction - -// return the smaller & larger element making sure to prefer the -// first occurance of the minimum/maximum element -template -struct minmax_element_reduction -{ - BinaryPredicate comp; - - minmax_element_reduction(BinaryPredicate comp) : comp(comp){} - - __host__ __device__ - thrust::tuple< thrust::tuple, thrust::tuple > - operator()(const thrust::tuple< thrust::tuple, thrust::tuple >& lhs, - const thrust::tuple< thrust::tuple, thrust::tuple >& rhs ) - { - - return thrust::make_tuple(min_element_reduction(comp)(thrust::get<0>(lhs), thrust::get<0>(rhs)), - max_element_reduction(comp)(thrust::get<1>(lhs), thrust::get<1>(rhs))); - } // end operator()() -}; // end minmax_element_reduction - -template -struct duplicate_tuple -{ - __host__ __device__ - thrust::tuple< thrust::tuple, thrust::tuple > - operator()(const thrust::tuple& t) - { - return thrust::make_tuple(t, t); - } -}; // end duplicate_tuple - -} // end namespace detail - -template -ForwardIterator min_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last) -{ - typedef typename thrust::iterator_value::type value_type; - - return thrust::min_element(exec, first, last, thrust::less()); -} // end min_element() - -template -ForwardIterator min_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - if (first == last) - return last; - - typedef typename thrust::iterator_traits::value_type InputType; - typedef typename thrust::iterator_traits::difference_type IndexType; - - thrust::tuple result = - thrust::reduce - (exec, - thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))), - thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))) + (last - first), - thrust::tuple(*first, 0), - detail::min_element_reduction(comp)); - - return first + thrust::get<1>(result); -} // end min_element() - -template -ForwardIterator max_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last) -{ - typedef typename thrust::iterator_value::type value_type; - - return thrust::max_element(exec, first, last, thrust::less()); -} // end max_element() - -template -ForwardIterator max_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - if (first == last) - return last; - - typedef typename thrust::iterator_traits::value_type InputType; - typedef typename thrust::iterator_traits::difference_type IndexType; - - thrust::tuple result = - thrust::reduce - (exec, - thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))), - thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))) + (last - first), - thrust::tuple(*first, 0), - detail::max_element_reduction(comp)); - - return first + thrust::get<1>(result); -} // end max_element() - -template -thrust::pair minmax_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last) -{ - typedef typename thrust::iterator_value::type value_type; - - return thrust::minmax_element(exec, first, last, thrust::less()); -} // end minmax_element() - -template -thrust::pair minmax_element(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - if (first == last) - return thrust::make_pair(last, last); - - typedef typename thrust::iterator_traits::value_type InputType; - typedef typename thrust::iterator_traits::difference_type IndexType; - - thrust::tuple< thrust::tuple, thrust::tuple > result = - thrust::transform_reduce - (exec, - thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))), - thrust::make_zip_iterator(thrust::make_tuple(first, thrust::counting_iterator(0))) + (last - first), - detail::duplicate_tuple(), - detail::duplicate_tuple()(thrust::tuple(*first, 0)), - detail::minmax_element_reduction(comp)); - - return thrust::make_pair(first + thrust::get<1>(thrust::get<0>(result)), first + thrust::get<1>(thrust::get<1>(result))); -} // end minmax_element() - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/fill.h b/compat/thrust/system/detail/generic/fill.h deleted file mode 100644 index 9745b1cf57..0000000000 --- a/compat/thrust/system/detail/generic/fill.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file fill.h - * \brief Device implementation of fill. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator fill_n(thrust::execution_policy &exec, - OutputIterator first, - Size n, - const T &value) -{ - // XXX consider using the placeholder expression _1 = value - return thrust::generate_n(exec, first, n, thrust::detail::fill_functor(value)); -} - -template - void fill(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &value) -{ - // XXX consider using the placeholder expression _1 = value - thrust::generate(exec, first, last, thrust::detail::fill_functor(value)); -} - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/find.h b/compat/thrust/system/detail/generic/find.h deleted file mode 100644 index 08888c5a7c..0000000000 --- a/compat/thrust/system/detail/generic/find.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -InputIterator find(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - const T& value); - -template -InputIterator find_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - Predicate pred); - -template -InputIterator find_if_not(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - Predicate pred); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/find.inl b/compat/thrust/system/detail/generic/find.inl deleted file mode 100644 index a3414e1c28..0000000000 --- a/compat/thrust/system/detail/generic/find.inl +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - - -// Contributed by Erich Elsen - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template -InputIterator find(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - const T& value) -{ - // XXX consider a placeholder expression here - return thrust::find_if(exec, first, last, thrust::detail::equal_to_value(value)); -} // end find() - - -template -struct find_if_functor -{ - __host__ __device__ - TupleType operator()(const TupleType& lhs, const TupleType& rhs) const - { - // select the smallest index among true results - if (thrust::get<0>(lhs) && thrust::get<0>(rhs)) - return TupleType(true, (thrust::min)(thrust::get<1>(lhs), thrust::get<1>(rhs))); - else if (thrust::get<0>(lhs)) - return lhs; - else - return rhs; - } -}; - - -template -InputIterator find_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - Predicate pred) -{ - typedef typename thrust::iterator_traits::difference_type difference_type; - typedef typename thrust::tuple result_type; - - // empty sequence - if (first == last) - return last; - - const difference_type n = thrust::distance(first, last); - - // this implementation breaks up the sequence into separate intervals - // in an attempt to early-out as soon as a value is found - - // TODO incorporate sizeof(InputType) into interval_threshold and round to multiple of 32 - const difference_type interval_threshold = 1 << 20; - const difference_type interval_size = (std::min)(interval_threshold, n); - - // force transform_iterator output to bool - typedef thrust::transform_iterator XfrmIterator; - typedef thrust::tuple > IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - IteratorTuple iter_tuple = thrust::make_tuple(XfrmIterator(first, pred), - thrust::counting_iterator(0)); - - ZipIterator begin = thrust::make_zip_iterator(iter_tuple); - ZipIterator end = begin + n; - - for(ZipIterator interval_begin = begin; interval_begin < end; interval_begin += interval_size) - { - ZipIterator interval_end = interval_begin + interval_size; - if(end < interval_end) - { - interval_end = end; - } // end if - - result_type result = thrust::reduce(exec, - interval_begin, interval_end, - result_type(false,interval_end - begin), - find_if_functor()); - - // see if we found something - if (thrust::get<0>(result)) - { - return first + thrust::get<1>(result); - } - } - - //nothing was found if we reach here... - return first + n; -} - - -template -InputIterator find_if_not(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - Predicate pred) -{ - return thrust::find_if(exec, first, last, thrust::detail::not1(pred)); -} // end find() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/for_each.h b/compat/thrust/system/detail/generic/for_each.h deleted file mode 100644 index 61abe20b6f..0000000000 --- a/compat/thrust/system/detail/generic/for_each.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file for_each.h - * \brief Generic implementation of for_each & for_each_n. - * It is an error to call these functions; they have no implementation. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template -InputIterator for_each(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - UnaryFunction f) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return first; -} // end for_each() - - -template -InputIterator for_each_n(thrust::execution_policy &exec, - InputIterator first, - Size n, - UnaryFunction f) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return first; -} // end for_each_n() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/gather.h b/compat/thrust/system/detail/generic/gather.h deleted file mode 100644 index cfb6f85ca5..0000000000 --- a/compat/thrust/system/detail/generic/gather.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator gather(thrust::execution_policy &exec, - InputIterator map_first, - InputIterator map_last, - RandomAccessIterator input_first, - OutputIterator result); - - -template - OutputIterator gather_if(thrust::execution_policy &exec, - InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result); - - -template - OutputIterator gather_if(thrust::execution_policy &exec, - InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result, - Predicate pred); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/gather.inl b/compat/thrust/system/detail/generic/gather.inl deleted file mode 100644 index ab2cdd8116..0000000000 --- a/compat/thrust/system/detail/generic/gather.inl +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - OutputIterator gather(thrust::execution_policy &exec, - InputIterator map_first, - InputIterator map_last, - RandomAccessIterator input_first, - OutputIterator result) -{ - return thrust::transform(exec, - thrust::make_permutation_iterator(input_first, map_first), - thrust::make_permutation_iterator(input_first, map_last), - result, - thrust::identity::type>()); -} // end gather() - - -template - OutputIterator gather_if(thrust::execution_policy &exec, - InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result) -{ - typedef typename thrust::iterator_value::type StencilType; - return thrust::gather_if(exec, - map_first, - map_last, - stencil, - input_first, - result, - thrust::identity()); -} // end gather_if() - - -template - OutputIterator gather_if(thrust::execution_policy &exec, - InputIterator1 map_first, - InputIterator1 map_last, - InputIterator2 stencil, - RandomAccessIterator input_first, - OutputIterator result, - Predicate pred) -{ - typedef typename thrust::iterator_value::type InputType; - return thrust::transform_if(exec, - thrust::make_permutation_iterator(input_first, map_first), - thrust::make_permutation_iterator(input_first, map_last), - stencil, - result, - thrust::identity(), - pred); -} // end gather_if() - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/generate.h b/compat/thrust/system/detail/generic/generate.h deleted file mode 100644 index e7a8e00726..0000000000 --- a/compat/thrust/system/detail/generic/generate.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - void generate(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Generator gen); - -template - OutputIterator generate_n(thrust::execution_policy &exec, - OutputIterator first, - Size n, - Generator gen); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/generate.inl b/compat/thrust/system/detail/generic/generate.inl deleted file mode 100644 index 4da5763f9c..0000000000 --- a/compat/thrust/system/detail/generic/generate.inl +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - void generate(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Generator gen) -{ - thrust::for_each(exec, first, last, typename thrust::detail::generate_functor::type(gen)); -} // end generate() - -template - OutputIterator generate_n(thrust::execution_policy &exec, - OutputIterator first, - Size n, - Generator gen) -{ - return thrust::for_each_n(exec, first, n, typename thrust::detail::generate_functor::type(gen)); -} // end generate() - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/inner_product.h b/compat/thrust/system/detail/generic/inner_product.h deleted file mode 100644 index 9ac5c69636..0000000000 --- a/compat/thrust/system/detail/generic/inner_product.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - OutputType inner_product(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputType init); - -template -OutputType inner_product(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputType init, - BinaryFunction1 binary_op1, - BinaryFunction2 binary_op2); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/inner_product.inl b/compat/thrust/system/detail/generic/inner_product.inl deleted file mode 100644 index b6a339ea15..0000000000 --- a/compat/thrust/system/detail/generic/inner_product.inl +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template -OutputType inner_product(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputType init) -{ - thrust::plus binary_op1; - thrust::multiplies binary_op2; - return thrust::inner_product(exec, first1, last1, first2, init, binary_op1, binary_op2); -} // end inner_product() - - -template -OutputType inner_product(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputType init, - BinaryFunction1 binary_op1, - BinaryFunction2 binary_op2) -{ - typedef thrust::zip_iterator > ZipIter; - - ZipIter first = thrust::make_zip_iterator(thrust::make_tuple(first1,first2)); - - // only the first iterator in the tuple is relevant for the purposes of last - ZipIter last = thrust::make_zip_iterator(thrust::make_tuple(last1, first2)); - - return thrust::transform_reduce(exec, first, last, thrust::detail::zipped_binary_op(binary_op2), init, binary_op1); -} // end inner_product() - - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/logical.h b/compat/thrust/system/detail/generic/logical.h deleted file mode 100644 index e0d01e30a5..0000000000 --- a/compat/thrust/system/detail/generic/logical.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template -bool all_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) -{ - return thrust::find_if(exec, first, last, thrust::detail::not1(pred)) == last; -} - -template -bool any_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) -{ - return thrust::find_if(exec, first, last, pred) != last; -} - -template -bool none_of(thrust::execution_policy &exec, InputIterator first, InputIterator last, Predicate pred) -{ - return !thrust::any_of(exec, first, last, pred); -} - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/memory.h b/compat/thrust/system/detail/generic/memory.h deleted file mode 100644 index c0fe623ac4..0000000000 --- a/compat/thrust/system/detail/generic/memory.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file generic/memory.h - * \brief Generic implementation of memory functions. - * Calling some of these is an error. They have no implementation. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template void malloc(thrust::execution_policy &, Size); - -template -thrust::pointer malloc(thrust::execution_policy &s, std::size_t n); - -template void free(thrust::execution_policy &, Pointer); - -template -__host__ __device__ -void assign_value(tag, Pointer1, Pointer2); - -template -__host__ __device__ -void get_value(thrust::execution_policy &, Pointer); - -template -__host__ __device__ -void iter_swap(tag, Pointer1, Pointer2); - -} // end generic -} // end detail -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/detail/generic/memory.inl b/compat/thrust/system/detail/generic/memory.inl deleted file mode 100644 index f89a763a62..0000000000 --- a/compat/thrust/system/detail/generic/memory.inl +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - void malloc(thrust::execution_policy &, Size) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); -} - - -template - thrust::pointer - malloc(thrust::execution_policy &exec, std::size_t n) -{ - thrust::pointer void_ptr = thrust::malloc(exec, sizeof(T) * n); - - return pointer(static_cast(void_ptr.get())); -} // end malloc() - - -template - void free(thrust::execution_policy &, Pointer) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); -} - - -template -__host__ __device__ -void assign_value(thrust::execution_policy &, Pointer1, Pointer2) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); -} - - -template -__host__ __device__ -void get_value(thrust::execution_policy &, Pointer) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); -} - - -template -__host__ __device__ -void iter_swap(tag, Pointer1, Pointer2) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); -} - - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/merge.h b/compat/thrust/system/detail/generic/merge.h deleted file mode 100644 index 5f0b99640d..0000000000 --- a/compat/thrust/system/detail/generic/merge.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -// XXX calling this function is an error; there is no implementation -template - OutputIterator merge(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp); - - -template - OutputIterator merge(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -template - thrust::pair - merge_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - Compare comp); - - -template - thrust::pair - merge_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/merge.inl b/compat/thrust/system/detail/generic/merge.inl deleted file mode 100644 index b913611168..0000000000 --- a/compat/thrust/system/detail/generic/merge.inl +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator merge(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return result; -} // end merge() - - -template - OutputIterator merge(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::merge(exec,first1,last1,first2,last2,result,thrust::less()); -} // end merge() - - -template - thrust::pair - merge_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - Compare comp) -{ - typedef thrust::tuple iterator_tuple1; - typedef thrust::tuple iterator_tuple2; - typedef thrust::tuple iterator_tuple3; - - typedef thrust::zip_iterator zip_iterator1; - typedef thrust::zip_iterator zip_iterator2; - typedef thrust::zip_iterator zip_iterator3; - - zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); - zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); - - zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); - zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); - - zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); - - thrust::detail::compare_first comp_first(comp); - - iterator_tuple3 result = thrust::merge(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); - - return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); -} // end merge_by_key() - - -template - thrust::pair - merge_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::merge_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less()); -} // end merge_by_key() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/mismatch.h b/compat/thrust/system/detail/generic/mismatch.h deleted file mode 100644 index dc581ffbee..0000000000 --- a/compat/thrust/system/detail/generic/mismatch.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - thrust::pair - mismatch(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2); - - -template - thrust::pair - mismatch(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - BinaryPredicate pred); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/mismatch.inl b/compat/thrust/system/detail/generic/mismatch.inl deleted file mode 100644 index 923c27f71b..0000000000 --- a/compat/thrust/system/detail/generic/mismatch.inl +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - thrust::pair - mismatch(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2) -{ - typedef typename thrust::iterator_value::type InputType1; - - // XXX use a placeholder expression here - return thrust::mismatch(exec, first1, last1, first2, thrust::detail::equal_to()); -} // end mismatch() - -template - thrust::pair - mismatch(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - BinaryPredicate pred) -{ - // Contributed by Erich Elsen - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first1,first2)); - ZipIterator zipped_last = thrust::make_zip_iterator(thrust::make_tuple(last1, first2)); - - ZipIterator result = thrust::find_if_not(exec, zipped_first, zipped_last, thrust::detail::tuple_binary_predicate(pred)); - - return thrust::make_pair(thrust::get<0>(result.get_iterator_tuple()), - thrust::get<1>(result.get_iterator_tuple())); -} // end mismatch() - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/partition.h b/compat/thrust/system/detail/generic/partition.h deleted file mode 100644 index 63daa1d1c1..0000000000 --- a/compat/thrust/system/detail/generic/partition.h +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file partition.h - * \brief Generic implementations of partition functions. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - ForwardIterator stable_partition(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - -template - ForwardIterator stable_partition(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - -template - thrust::pair - stable_partition_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - -template - thrust::pair - stable_partition_copy(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - -template - ForwardIterator partition(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - -template - ForwardIterator partition(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - -template - thrust::pair - partition_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - -template - thrust::pair - partition_copy(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - -template - ForwardIterator partition_point(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - -template - bool is_partitioned(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - Predicate pred); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/partition.inl b/compat/thrust/system/detail/generic/partition.inl deleted file mode 100644 index 3298afc6f5..0000000000 --- a/compat/thrust/system/detail/generic/partition.inl +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - ForwardIterator stable_partition(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - // copy input to temp buffer - thrust::detail::temporary_array temp(exec, first, last); - - // count the size of the true partition - typename thrust::iterator_difference::type num_true = thrust::count_if(exec, first,last,pred); - - // point to the beginning of the false partition - ForwardIterator out_false = first; - thrust::advance(out_false, num_true); - - return thrust::stable_partition_copy(exec, temp.begin(), temp.end(), first, out_false, pred).first; -} // end stable_partition() - - -template - ForwardIterator stable_partition(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - // copy input to temp buffer - thrust::detail::temporary_array temp(exec, first, last); - - // count the size of the true partition - InputIterator stencil_last = stencil; - thrust::advance(stencil_last, temp.size()); - typename thrust::iterator_difference::type num_true = thrust::count_if(exec, stencil, stencil_last, pred); - - // point to the beginning of the false partition - ForwardIterator out_false = first; - thrust::advance(out_false, num_true); - - return thrust::stable_partition_copy(exec, temp.begin(), temp.end(), stencil, first, out_false, pred).first; -} // end stable_partition() - - -template - thrust::pair - stable_partition_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - thrust::detail::unary_negate not_pred(pred); - - // remove_copy_if the true partition to out_true - OutputIterator1 end_of_true_partition = thrust::remove_copy_if(exec, first, last, out_true, not_pred); - - // remove_copy_if the false partition to out_false - OutputIterator2 end_of_false_partition = thrust::remove_copy_if(exec, first, last, out_false, pred); - - return thrust::make_pair(end_of_true_partition, end_of_false_partition); -} // end stable_partition_copy() - - -template - thrust::pair - stable_partition_copy(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - thrust::detail::unary_negate not_pred(pred); - - // remove_copy_if the true partition to out_true - OutputIterator1 end_of_true_partition = thrust::remove_copy_if(exec, first, last, stencil, out_true, not_pred); - - // remove_copy_if the false partition to out_false - OutputIterator2 end_of_false_partition = thrust::remove_copy_if(exec, first, last, stencil, out_false, pred); - - return thrust::make_pair(end_of_true_partition, end_of_false_partition); -} // end stable_partition_copy() - - -template - ForwardIterator partition(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - return thrust::stable_partition(exec, first, last, pred); -} // end partition() - - -template - ForwardIterator partition(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - return thrust::stable_partition(exec, first, last, stencil, pred); -} // end partition() - - -template - thrust::pair - partition_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - return thrust::stable_partition_copy(exec,first,last,out_true,out_false,pred); -} // end partition_copy() - - -template - thrust::pair - partition_copy(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - return thrust::stable_partition_copy(exec,first,last,stencil,out_true,out_false,pred); -} // end partition_copy() - - -template - ForwardIterator partition_point(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - return thrust::find_if_not(exec, first, last, pred); -} // end partition_point() - - -template - bool is_partitioned(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - Predicate pred) -{ - return thrust::is_sorted(exec, - thrust::make_transform_iterator(first, thrust::detail::not1(pred)), - thrust::make_transform_iterator(last, thrust::detail::not1(pred))); -} // end is_partitioned() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/reduce.h b/compat/thrust/system/detail/generic/reduce.h deleted file mode 100644 index 2811df164f..0000000000 --- a/compat/thrust/system/detail/generic/reduce.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - typename thrust::iterator_traits::value_type - reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last); - -template - T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init); - -template - T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init, BinaryFunction binary_op); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/reduce.inl b/compat/thrust/system/detail/generic/reduce.inl deleted file mode 100644 index 8f52385163..0000000000 --- a/compat/thrust/system/detail/generic/reduce.inl +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - typename thrust::iterator_traits::value_type - reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last) -{ - typedef typename thrust::iterator_value::type InputType; - - // use InputType(0) as init by default - return thrust::reduce(exec, first, last, InputType(0)); -} // end reduce() - - -template - T reduce(thrust::execution_policy &exec, InputIterator first, InputIterator last, T init) -{ - // use plus by default - return thrust::reduce(exec, first, last, init, thrust::plus()); -} // end reduce() - - -template - OutputType reduce(thrust::execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - OutputType init, - BinaryFunction binary_op) -{ - // unimplemented - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return OutputType(); -} // end reduce() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/reduce_by_key.h b/compat/thrust/system/detail/generic/reduce_by_key.h deleted file mode 100644 index c6064ab53e..0000000000 --- a/compat/thrust/system/detail/generic/reduce_by_key.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - thrust::pair - reduce_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output); - -template - thrust::pair - reduce_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred); - -template - thrust::pair - reduce_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/reduce_by_key.inl b/compat/thrust/system/detail/generic/reduce_by_key.inl deleted file mode 100644 index 2ca21a5aab..0000000000 --- a/compat/thrust/system/detail/generic/reduce_by_key.inl +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce_by_key.inl - * \brief Inline file for reduce_by_key.h. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -template -struct reduce_by_key_functor -{ - AssociativeOperator binary_op; - - typedef typename thrust::tuple result_type; - - __host__ __device__ - reduce_by_key_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {} - - __host__ __device__ - result_type operator()(result_type a, result_type b) - { - return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)), - thrust::get<1>(a) | thrust::get<1>(b)); - } -}; - -} // end namespace detail - - -template - thrust::pair - reduce_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - typedef typename thrust::iterator_traits::difference_type difference_type; - typedef typename thrust::iterator_traits::value_type KeyType; - - typedef unsigned int FlagType; // TODO use difference_type - - // the pseudocode for deducing the type of the temporary used below: - // - // if BinaryFunction is AdaptableBinaryFunction - // TemporaryType = AdaptableBinaryFunction::result_type - // else if OutputIterator2 is a "pure" output iterator - // TemporaryType = InputIterator2::value_type - // else - // TemporaryType = OutputIterator2::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - typedef typename thrust::detail::eval_if< - thrust::detail::has_result_type::value, - thrust::detail::result_type, - thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - if (keys_first == keys_last) - return thrust::make_pair(keys_output, values_output); - - // input size - difference_type n = keys_last - keys_first; - - InputIterator2 values_last = values_first + n; - - // compute head flags - thrust::detail::temporary_array head_flags(exec, n); - thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, head_flags.begin() + 1, thrust::detail::not2(binary_pred)); - head_flags[0] = 1; - - // compute tail flags - thrust::detail::temporary_array tail_flags(exec, n); //COPY INSTEAD OF TRANSFORM - thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, tail_flags.begin(), thrust::detail::not2(binary_pred)); - tail_flags[n-1] = 1; - - // scan the values by flag - thrust::detail::temporary_array scanned_values(exec, n); - thrust::detail::temporary_array scanned_tail_flags(exec, n); - - thrust::inclusive_scan - (exec, - thrust::make_zip_iterator(thrust::make_tuple(values_first, head_flags.begin())), - thrust::make_zip_iterator(thrust::make_tuple(values_last, head_flags.end())), - thrust::make_zip_iterator(thrust::make_tuple(scanned_values.begin(), scanned_tail_flags.begin())), - detail::reduce_by_key_functor(binary_op)); - - thrust::exclusive_scan(exec, tail_flags.begin(), tail_flags.end(), scanned_tail_flags.begin(), FlagType(0), thrust::plus()); - - // number of unique keys - FlagType N = scanned_tail_flags[n - 1] + 1; - - // scatter the keys and accumulated values - thrust::scatter_if(exec, keys_first, keys_last, scanned_tail_flags.begin(), head_flags.begin(), keys_output); - thrust::scatter_if(exec, scanned_values.begin(), scanned_values.end(), scanned_tail_flags.begin(), tail_flags.begin(), values_output); - - return thrust::make_pair(keys_output + N, values_output + N); -} // end reduce_by_key() - - -template - thrust::pair - reduce_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output) -{ - typedef typename thrust::iterator_value::type KeyType; - - // use equal_to as default BinaryPredicate - return thrust::reduce_by_key(exec, keys_first, keys_last, values_first, keys_output, values_output, thrust::equal_to()); -} // end reduce_by_key() - - -template - thrust::pair - reduce_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - typedef typename thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - >::type T; - - // use plus as default BinaryFunction - return thrust::reduce_by_key(exec, - keys_first, keys_last, - values_first, - keys_output, - values_output, - binary_pred, - thrust::plus()); -} // end reduce_by_key() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/remove.h b/compat/thrust/system/detail/generic/remove.h deleted file mode 100644 index e23673574e..0000000000 --- a/compat/thrust/system/detail/generic/remove.h +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file remove.h - * \brief Generic implementations of remove functions. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - ForwardIterator remove(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &value); - -template - OutputIterator remove_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - const T &value); - -template - ForwardIterator remove_if(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - -template - ForwardIterator remove_if(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - -template - OutputIterator remove_copy_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - -template - OutputIterator remove_copy_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/remove.inl b/compat/thrust/system/detail/generic/remove.inl deleted file mode 100644 index 8a533e029a..0000000000 --- a/compat/thrust/system/detail/generic/remove.inl +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file remove.inl - * \brief Inline file for remove.h - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - ForwardIterator remove(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &value) -{ - thrust::detail::equal_to_value pred(value); - - // XXX consider using a placeholder here - return thrust::remove_if(exec, first, last, pred); -} // end remove() - - -template - OutputIterator remove_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - const T &value) -{ - thrust::detail::equal_to_value pred(value); - - // XXX consider using a placeholder here - return thrust::remove_copy_if(exec, first, last, result, pred); -} // end remove_copy() - - -template - ForwardIterator remove_if(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - // create temporary storage for an intermediate result - thrust::detail::temporary_array temp(exec, first, last); - - // remove into temp - return thrust::remove_copy_if(exec, temp.begin(), temp.end(), temp.begin(), first, pred); -} // end remove_if() - - -template - ForwardIterator remove_if(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - // create temporary storage for an intermediate result - thrust::detail::temporary_array temp(exec, first, last); - - // remove into temp - return thrust::remove_copy_if(exec, temp.begin(), temp.end(), stencil, first, pred); -} // end remove_if() - - -template - OutputIterator remove_copy_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - return thrust::remove_copy_if(exec, first, last, first, result, pred); -} // end remove_copy_if() - - -template - OutputIterator remove_copy_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - return thrust::copy_if(exec, first, last, stencil, result, thrust::detail::not1(pred)); -} // end remove_copy_if() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/replace.h b/compat/thrust/system/detail/generic/replace.h deleted file mode 100644 index deb2e55bc8..0000000000 --- a/compat/thrust/system/detail/generic/replace.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator replace_copy_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred, - const T &new_value); - - -template - OutputIterator replace_copy_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred, - const T &new_value); - - -template - OutputIterator replace_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - const T &old_value, - const T &new_value); - - -template - void replace_if(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred, - const T &new_value); - - -template - void replace_if(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred, - const T &new_value); - - -template - void replace(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &old_value, - const T &new_value); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/replace.inl b/compat/thrust/system/detail/generic/replace.inl deleted file mode 100644 index 52e7118ecc..0000000000 --- a/compat/thrust/system/detail/generic/replace.inl +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -// this functor receives x, and returns a new_value if predicate(x) is true; otherwise, -// it returns x -template - struct new_value_if -{ - new_value_if(Predicate p, NewType nv):pred(p),new_value(nv){} - - template - __host__ __device__ - OutputType operator()(const InputType x) const - { - return pred(x) ? new_value : x; - } // end operator()() - - // this version of operator()() works like the previous but - // feeds its second argument to pred - template - __host__ __device__ - OutputType operator()(const InputType x, const PredicateArgumentType y) - { - return pred(y) ? new_value : x; - } // end operator()() - - Predicate pred; - NewType new_value; -}; // end new_value_if - -// this unary functor ignores its argument and returns a constant -template - struct constant_unary -{ - constant_unary(T _c):c(_c){} - - template - __host__ __device__ - T operator()(U &x) - { - return c; - } // end operator()() - - T c; -}; // end constant_unary - -} // end detail - -template - OutputIterator replace_copy_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred, - const T &new_value) -{ - typedef typename thrust::iterator_traits::value_type InputType; - typedef typename thrust::iterator_traits::value_type OutputType; - - detail::new_value_if op(pred,new_value); - return thrust::transform(exec, first, last, result, op); -} // end replace_copy_if() - -template - OutputIterator replace_copy_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred, - const T &new_value) -{ - typedef typename thrust::iterator_traits::value_type OutputType; - - detail::new_value_if op(pred,new_value); - return thrust::transform(exec, first, last, stencil, result, op); -} // end replace_copy_if() - - -template - OutputIterator replace_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - const T &old_value, - const T &new_value) -{ - thrust::detail::equal_to_value pred(old_value); - return thrust::replace_copy_if(exec, first, last, result, pred, new_value); -} // end replace_copy() - -template - void replace_if(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred, - const T &new_value) -{ - detail::constant_unary f(new_value); - - // XXX replace this with generate_if: - // constant_nullary f(new_value); - // generate_if(first, last, first, f, pred); - thrust::transform_if(exec, first, last, first, first, f, pred); -} // end replace_if() - -template - void replace_if(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred, - const T &new_value) -{ - detail::constant_unary f(new_value); - - // XXX replace this with generate_if: - // constant_nullary f(new_value); - // generate_if(stencil, stencil + n, first, f, pred); - thrust::transform_if(exec, first, last, stencil, first, f, pred); -} // end replace_if() - -template - void replace(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &old_value, - const T &new_value) -{ - thrust::detail::equal_to_value pred(old_value); - return thrust::replace_if(exec, first, last, pred, new_value); -} // end replace() - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/reverse.h b/compat/thrust/system/detail/generic/reverse.h deleted file mode 100644 index 327bf221b1..0000000000 --- a/compat/thrust/system/detail/generic/reverse.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - void reverse(thrust::execution_policy &exec, - BidirectionalIterator first, - BidirectionalIterator last); - -template - OutputIterator reverse_copy(thrust::execution_policy &exec, - BidirectionalIterator first, - BidirectionalIterator last, - OutputIterator result); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/reverse.inl b/compat/thrust/system/detail/generic/reverse.inl deleted file mode 100644 index 27c1bbf2fa..0000000000 --- a/compat/thrust/system/detail/generic/reverse.inl +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - void reverse(thrust::execution_policy &exec, - BidirectionalIterator first, - BidirectionalIterator last) -{ - typedef typename thrust::iterator_difference::type difference_type; - - // find the midpoint of [first,last) - difference_type N = thrust::distance(first, last); - BidirectionalIterator mid(first); - thrust::advance(mid, N / 2); - - // swap elements of [first,mid) with [last - 1, mid) - thrust::swap_ranges(exec, first, mid, thrust::make_reverse_iterator(last)); -} // end reverse() - -template - OutputIterator reverse_copy(thrust::execution_policy &exec, - BidirectionalIterator first, - BidirectionalIterator last, - OutputIterator result) -{ - return thrust::copy(exec, - thrust::make_reverse_iterator(last), - thrust::make_reverse_iterator(first), - result); -} // end reverse_copy() - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - - diff --git a/compat/thrust/system/detail/generic/scalar/binary_search.h b/compat/thrust/system/detail/generic/scalar/binary_search.h deleted file mode 100644 index 6ed9e8d9ee..0000000000 --- a/compat/thrust/system/detail/generic/scalar/binary_search.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ - -namespace system -{ - -namespace detail -{ - -namespace generic -{ - -namespace scalar -{ - -template -__host__ __device__ -RandomAccessIterator lower_bound_n(RandomAccessIterator first, - Size n, - const T &val, - BinaryPredicate comp); - -template -__host__ __device__ -RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterator last, - const T &val, - BinaryPredicate comp); - -template -__host__ __device__ -RandomAccessIterator upper_bound_n(RandomAccessIterator first, - Size n, - const T &val, - BinaryPredicate comp); - -template -__host__ __device__ -RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterator last, - const T &val, - BinaryPredicate comp); - -template -__host__ __device__ - pair - equal_range(RandomAccessIterator first, RandomAccessIterator last, - const T &val, - BinaryPredicate comp); - -template -__host__ __device__ -bool binary_search(RandomAccessIterator first, RandomAccessIterator last, const T &value, Compare comp); - -} // end scalar - -} // end generic - -} // end detail - -} // end system - -} // end thrust - -#include - diff --git a/compat/thrust/system/detail/generic/scalar/binary_search.inl b/compat/thrust/system/detail/generic/scalar/binary_search.inl deleted file mode 100644 index 5a9d379612..0000000000 --- a/compat/thrust/system/detail/generic/scalar/binary_search.inl +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -namespace system -{ - -namespace detail -{ - -namespace generic -{ - -namespace scalar -{ - -template -__host__ __device__ -RandomAccessIterator lower_bound_n(RandomAccessIterator first, - Size n, - const T &val, - BinaryPredicate comp) -{ - // wrap comp - thrust::detail::host_device_function< - BinaryPredicate, - bool - > wrapped_comp(comp); - - Size start = 0, i; - while(start < n) - { - i = (start + n) / 2; - if(wrapped_comp(first[i], val)) - { - start = i + 1; - } - else - { - n = i; - } - } // end while - - return first + start; -} - -// XXX generalize these upon implementation of scalar::distance & scalar::advance - -template -__host__ __device__ -RandomAccessIterator lower_bound(RandomAccessIterator first, RandomAccessIterator last, - const T &val, - BinaryPredicate comp) -{ - typename thrust::iterator_difference::type n = last - first; - return lower_bound_n(first, n, val, comp); -} - -template -__host__ __device__ -RandomAccessIterator upper_bound_n(RandomAccessIterator first, - Size n, - const T &val, - BinaryPredicate comp) -{ - // wrap comp - thrust::detail::host_device_function< - BinaryPredicate, - bool - > wrapped_comp(comp); - - Size start = 0, i; - while(start < n) - { - i = (start + n) / 2; - if(wrapped_comp(val, first[i])) - { - n = i; - } - else - { - start = i + 1; - } - } // end while - - return first + start; -} - -template -__host__ __device__ -RandomAccessIterator upper_bound(RandomAccessIterator first, RandomAccessIterator last, - const T &val, - BinaryPredicate comp) -{ - typename thrust::iterator_difference::type n = last - first; - return upper_bound_n(first, n, val, comp); -} - -template -__host__ __device__ - pair - equal_range(RandomAccessIterator first, RandomAccessIterator last, - const T &val, - BinaryPredicate comp) -{ - RandomAccessIterator lb = thrust::system::detail::generic::scalar::lower_bound(first, last, val, comp); - return thrust::make_pair(lb, thrust::system::detail::generic::scalar::upper_bound(lb, last, val, comp)); -} - - -template -__host__ __device__ -bool binary_search(RandomAccessIterator first, RandomAccessIterator last, const T &value, Compare comp) -{ - RandomAccessIterator iter = thrust::system::detail::generic::scalar::lower_bound(first, last, value, comp); - - // wrap comp - thrust::detail::host_device_function< - Compare, - bool - > wrapped_comp(comp); - - return iter != last && !wrapped_comp(value,*iter); -} - -} // end scalar - -} // end generic - -} // end detail - -} // end system - -} // end thrust - -#include - diff --git a/compat/thrust/system/detail/generic/scan.h b/compat/thrust/system/detail/generic/scan.h deleted file mode 100644 index 205f87ff9f..0000000000 --- a/compat/thrust/system/detail/generic/scan.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator inclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -// XXX it is an error to call this function; it has no implementation -template - OutputIterator inclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op); - - -template - OutputIterator exclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -template - OutputIterator exclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init); - - -// XXX it is an error to call this function; it has no implementation -template - OutputIterator exclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - BinaryFunction binary_op); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/scan.inl b/compat/thrust/system/detail/generic/scan.inl deleted file mode 100644 index 33e0803c70..0000000000 --- a/compat/thrust/system/detail/generic/scan.inl +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator inclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - // the pseudocode for deducing the type of the temporary used below: - // - // if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - - typedef typename thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - >::type ValueType; - - // assume plus as the associative operator - return thrust::inclusive_scan(exec, first, last, result, thrust::plus()); -} // end inclusive_scan() - - -template - OutputIterator exclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - // the pseudocode for deducing the type of the temporary used below: - // - // if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - - typedef typename thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - >::type ValueType; - - // assume 0 as the initialization value - return thrust::exclusive_scan(exec, first, last, result, ValueType(0)); -} // end exclusive_scan() - - -template - OutputIterator exclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init) -{ - // assume plus as the associative operator - return thrust::exclusive_scan(exec, first, last, result, init, thrust::plus()); -} // end exclusive_scan() - - -template - OutputIterator inclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - // unimplemented primitive - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return result; -} // end inclusive_scan - - -template - OutputIterator exclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - BinaryFunction binary_op) -{ - // unimplemented primitive - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return result; -} // end exclusive_scan() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/scan_by_key.h b/compat/thrust/system/detail/generic/scan_by_key.h deleted file mode 100644 index 160121b58b..0000000000 --- a/compat/thrust/system/detail/generic/scan_by_key.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan_by_key.h - * \brief Generic implementations of key-value scans. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result); - - -template - OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred); - - -template - OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred, - AssociativeOperator binary_op); - - -template - OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result); - - -template - OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init); - - -template - OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred); - - -template - OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred, - AssociativeOperator binary_op); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/scan_by_key.inl b/compat/thrust/system/detail/generic/scan_by_key.inl deleted file mode 100644 index d866ddec15..0000000000 --- a/compat/thrust/system/detail/generic/scan_by_key.inl +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -template -struct segmented_scan_functor -{ - AssociativeOperator binary_op; - - typedef typename thrust::tuple result_type; - - __host__ __device__ - segmented_scan_functor(AssociativeOperator _binary_op) : binary_op(_binary_op) {} - - __host__ __device__ - result_type operator()(result_type a, result_type b) - { - return result_type(thrust::get<1>(b) ? thrust::get<0>(b) : binary_op(thrust::get<0>(a), thrust::get<0>(b)), - thrust::get<1>(a) | thrust::get<1>(b)); - } -}; - -} // end namespace detail - - -template - OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result) -{ - typedef typename thrust::iterator_traits::value_type InputType1; - return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, thrust::equal_to()); -} - - -template - OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred) -{ - typedef typename thrust::iterator_traits::value_type OutputType; - return thrust::inclusive_scan_by_key(exec, first1, last1, first2, result, binary_pred, thrust::plus()); -} - - -template - OutputIterator inclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred, - AssociativeOperator binary_op) -{ - typedef typename thrust::iterator_traits::value_type OutputType; - typedef unsigned int HeadFlagType; - - const size_t n = last1 - first1; - - if(n != 0) - { - // compute head flags - thrust::detail::temporary_array flags(exec, n); - flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred)); - - // scan key-flag tuples, - // For additional details refer to Section 2 of the following paper - // S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs" - // NVIDIA Technical Report NVR-2008-003, December 2008 - // http://mgarland.org/files/papers/nvr-2008-003.pdf - thrust::inclusive_scan - (exec, - thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())), - thrust::make_zip_iterator(thrust::make_tuple(first2, flags.begin())) + n, - thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())), - detail::segmented_scan_functor(binary_op)); - } - - return result + n; -} - - -template - OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result) -{ - typedef typename thrust::iterator_traits::value_type OutputType; - return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, OutputType(0)); -} - - -template - OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init) -{ - typedef typename thrust::iterator_traits::value_type InputType1; - return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, thrust::equal_to()); -} - - -template - OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred) -{ - typedef typename thrust::iterator_traits::value_type OutputType; - return thrust::exclusive_scan_by_key(exec, first1, last1, first2, result, init, binary_pred, thrust::plus()); -} - - -template - OutputIterator exclusive_scan_by_key(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred, - AssociativeOperator binary_op) -{ - typedef typename thrust::iterator_traits::value_type OutputType; - typedef unsigned int HeadFlagType; - - const size_t n = last1 - first1; - - if(n != 0) - { - InputIterator2 last2 = first2 + n; - - // compute head flags - thrust::detail::temporary_array flags(exec, n); - flags[0] = 1; thrust::transform(exec, first1, last1 - 1, first1 + 1, flags.begin() + 1, thrust::detail::not2(binary_pred)); - - // shift input one to the right and initialize segments with init - thrust::detail::temporary_array temp(exec, n); - thrust::replace_copy_if(exec, first2, last2 - 1, flags.begin() + 1, temp.begin() + 1, thrust::negate(), init); - temp[0] = init; - - // scan key-flag tuples, - // For additional details refer to Section 2 of the following paper - // S. Sengupta, M. Harris, and M. Garland. "Efficient parallel scan algorithms for GPUs" - // NVIDIA Technical Report NVR-2008-003, December 2008 - // http://mgarland.org/files/papers/nvr-2008-003.pdf - thrust::inclusive_scan(exec, - thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())), - thrust::make_zip_iterator(thrust::make_tuple(temp.begin(), flags.begin())) + n, - thrust::make_zip_iterator(thrust::make_tuple(result, flags.begin())), - detail::segmented_scan_functor(binary_op)); - } - - return result + n; -} - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/scatter.h b/compat/thrust/system/detail/generic/scatter.h deleted file mode 100644 index 858d11adc1..0000000000 --- a/compat/thrust/system/detail/generic/scatter.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - void scatter(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - RandomAccessIterator output); - - -template - void scatter_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output); - - -template - void scatter_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output, - Predicate pred); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/scatter.inl b/compat/thrust/system/detail/generic/scatter.inl deleted file mode 100644 index 8c40359844..0000000000 --- a/compat/thrust/system/detail/generic/scatter.inl +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - void scatter(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - RandomAccessIterator output) -{ - thrust::transform(exec, - first, - last, - thrust::make_permutation_iterator(output, map), - thrust::identity::type>()); -} // end scatter() - - -template - void scatter_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output) -{ - // default predicate is identity - typedef typename thrust::iterator_value::type StencilType; - thrust::scatter_if(exec, first, last, map, stencil, output, thrust::identity()); -} // end scatter_if() - - -template - void scatter_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 map, - InputIterator3 stencil, - RandomAccessIterator output, - Predicate pred) -{ - typedef typename thrust::iterator_value::type InputType; - thrust::transform_if(exec, first, last, stencil, thrust::make_permutation_iterator(output, map), thrust::identity(), pred); -} // end scatter_if() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/select_system.h b/compat/thrust/system/detail/generic/select_system.h deleted file mode 100644 index 250a0bce44..0000000000 --- a/compat/thrust/system/detail/generic/select_system.h +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace select_system_detail -{ - - -// min_system case 1: both systems have the same type, just return the first one -template -__host__ __device__ -System &min_system(thrust::execution_policy &system1, - thrust::execution_policy &) -{ - return thrust::detail::derived_cast(system1); -} // end min_system() - - -// min_system case 2: systems have differing type and the first type is considered the minimum -template -__host__ __device__ - typename thrust::detail::enable_if< - thrust::detail::is_same< - System1, - typename thrust::detail::minimum_system::type - >::value, - System1 & - >::type - min_system(thrust::execution_policy &system1, thrust::execution_policy &) -{ - return thrust::detail::derived_cast(system1); -} // end min_system() - - -// min_system case 3: systems have differing type and the second type is considered the minimum -template -__host__ __device__ - typename thrust::detail::enable_if< - thrust::detail::is_same< - System2, - typename thrust::detail::minimum_system::type - >::value, - System2 & - >::type - min_system(thrust::execution_policy &, thrust::execution_policy &system2) -{ - return thrust::detail::derived_cast(system2); -} // end min_system() - - -} // end select_system_detail - - -template -__host__ __device__ - typename thrust::detail::disable_if< - select_system1_exists::value, - System & - >::type - select_system(thrust::execution_policy &system) -{ - return thrust::detail::derived_cast(system); -} // end select_system() - - -template -__host__ __device__ - typename thrust::detail::enable_if_defined< - thrust::detail::minimum_system - >::type - &select_system(thrust::execution_policy &system1, - thrust::execution_policy &system2) -{ - return select_system_detail::min_system(system1,system2); -} // end select_system() - - -template -__host__ __device__ - typename thrust::detail::lazy_disable_if< - select_system3_exists::value, - thrust::detail::minimum_system - >::type - &select_system(thrust::execution_policy &system1, - thrust::execution_policy &system2, - thrust::execution_policy &system3) -{ - return select_system(select_system(system1,system2), system3); -} // end select_system() - - -template -__host__ __device__ - typename thrust::detail::lazy_disable_if< - select_system4_exists::value, - thrust::detail::minimum_system - >::type - &select_system(thrust::execution_policy &system1, - thrust::execution_policy &system2, - thrust::execution_policy &system3, - thrust::execution_policy &system4) -{ - return select_system(select_system(system1,system2,system3), system4); -} // end select_system() - - -template -__host__ __device__ - typename thrust::detail::lazy_disable_if< - select_system5_exists::value, - thrust::detail::minimum_system - >::type - &select_system(thrust::execution_policy &system1, - thrust::execution_policy &system2, - thrust::execution_policy &system3, - thrust::execution_policy &system4, - thrust::execution_policy &system5) -{ - return select_system(select_system(system1,system2,system3,system4), system5); -} // end select_system() - - -template -__host__ __device__ - typename thrust::detail::lazy_disable_if< - select_system6_exists::value, - thrust::detail::minimum_system - >::type - &select_system(thrust::execution_policy &system1, - thrust::execution_policy &system2, - thrust::execution_policy &system3, - thrust::execution_policy &system4, - thrust::execution_policy &system5, - thrust::execution_policy &system6) -{ - return select_system(select_system(system1,system2,system3,system4,system5), system6); -} // end select_system() - - -// map a single any_system_tag to device_system_tag -inline __host__ __device__ -thrust::device_system_tag select_system(thrust::any_system_tag) -{ - return thrust::device_system_tag(); -} // end select_system() - - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/sequence.h b/compat/thrust/system/detail/generic/sequence.h deleted file mode 100644 index b23a7b5d22..0000000000 --- a/compat/thrust/system/detail/generic/sequence.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - void sequence(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last); - - -template - void sequence(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - T init); - - -template - void sequence(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - T init, - T step); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/sequence.inl b/compat/thrust/system/detail/generic/sequence.inl deleted file mode 100644 index 45aec69829..0000000000 --- a/compat/thrust/system/detail/generic/sequence.inl +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - void sequence(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last) -{ - typedef typename thrust::iterator_traits::value_type T; - - thrust::sequence(exec, first, last, T(0)); -} // end sequence() - - -template - void sequence(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - T init) -{ - thrust::sequence(exec, first, last, init, T(1)); -} // end sequence() - - -template - void sequence(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - T init, - T step) -{ - thrust::tabulate(exec, first, last, init + step * thrust::placeholders::_1); -} // end sequence() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/set_operations.h b/compat/thrust/system/detail/generic/set_operations.h deleted file mode 100644 index 1ca8d391de..0000000000 --- a/compat/thrust/system/detail/generic/set_operations.h +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator set_difference(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -// XXX it is an error to call this function; it has no implementation -template - OutputIterator set_difference(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp); - - -template - thrust::pair - set_difference_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -template - thrust::pair - set_difference_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp); - - -template - OutputIterator set_intersection(thrust::execution_policy &system, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -// XXX it is an error to call this function; it has no implementation -template - OutputIterator set_intersection(thrust::execution_policy &system, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp); - - -template - thrust::pair - set_intersection_by_key(thrust::execution_policy &system, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -template - thrust::pair - set_intersection_by_key(thrust::execution_policy &system, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp); - - -template - OutputIterator set_symmetric_difference(thrust::execution_policy &system, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -// XXX it is an error to call this function; it has no implementation -template - OutputIterator set_symmetric_difference(thrust::execution_policy &system, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp); - - -template - thrust::pair - set_symmetric_difference_by_key(thrust::execution_policy &system, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -template - thrust::pair - set_symmetric_difference_by_key(thrust::execution_policy &system, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp); - - -template - OutputIterator set_union(thrust::execution_policy &system, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result); - - -// XXX it is an error to call this function; it has no implementation -template - OutputIterator set_union(thrust::execution_policy &system, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp); - - -template - thrust::pair - set_union_by_key(thrust::execution_policy &system, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -template - thrust::pair - set_union_by_key(thrust::execution_policy &system, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/set_operations.inl b/compat/thrust/system/detail/generic/set_operations.inl deleted file mode 100644 index bac9ccd671..0000000000 --- a/compat/thrust/system/detail/generic/set_operations.inl +++ /dev/null @@ -1,449 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator set_difference(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::set_difference(exec, first1, last1, first2, last2, result, thrust::less()); -} // end set_difference() - - -template - thrust::pair - set_difference_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::set_difference_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less()); -} // end set_difference_by_key() - - -template - thrust::pair - set_difference_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - typedef thrust::tuple iterator_tuple1; - typedef thrust::tuple iterator_tuple2; - typedef thrust::tuple iterator_tuple3; - - typedef thrust::zip_iterator zip_iterator1; - typedef thrust::zip_iterator zip_iterator2; - typedef thrust::zip_iterator zip_iterator3; - - zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); - zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); - - zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); - zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); - - zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); - - thrust::detail::compare_first comp_first(comp); - - iterator_tuple3 result = thrust::set_difference(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); - - return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); -} // end set_difference_by_key() - - -template - OutputIterator set_intersection(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::set_intersection(exec, first1, last1, first2, last2, result, thrust::less()); -} // end set_intersection() - - -template - thrust::pair - set_intersection_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::set_intersection_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, keys_result, values_result, thrust::less()); -} // end set_intersection_by_key() - - -template - thrust::pair - set_intersection_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - typedef thrust::tuple iterator_tuple1; - typedef thrust::tuple iterator_tuple2; - typedef thrust::tuple iterator_tuple3; - - typedef thrust::zip_iterator zip_iterator1; - typedef thrust::zip_iterator zip_iterator2; - typedef thrust::zip_iterator zip_iterator3; - - // fabricate a values_first2 by "sending" keys twice - // it should never be dereferenced by set_intersection - InputIterator2 values_first2 = keys_first2; - - zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); - zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); - - zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); - zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); - - zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); - - thrust::detail::compare_first comp_first(comp); - - iterator_tuple3 result = thrust::set_intersection(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); - - return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); -} // end set_intersection_by_key() - - -template - OutputIterator set_symmetric_difference(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::set_symmetric_difference(exec, first1, last1, first2, last2, result, thrust::less()); -} // end set_symmetric_difference() - - -template - thrust::pair - set_symmetric_difference_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::set_symmetric_difference_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less()); -} // end set_symmetric_difference_by_key() - - -template - thrust::pair - set_symmetric_difference_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - typedef thrust::tuple iterator_tuple1; - typedef thrust::tuple iterator_tuple2; - typedef thrust::tuple iterator_tuple3; - - typedef thrust::zip_iterator zip_iterator1; - typedef thrust::zip_iterator zip_iterator2; - typedef thrust::zip_iterator zip_iterator3; - - zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); - zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); - - zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); - zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); - - zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); - - thrust::detail::compare_first comp_first(comp); - - iterator_tuple3 result = thrust::set_symmetric_difference(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); - - return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); -} // end set_symmetric_difference_by_key() - - -template - OutputIterator set_union(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::set_union(exec, first1, last1, first2, last2, result, thrust::less()); -} // end set_union() - - -template - thrust::pair - set_union_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::set_union_by_key(exec, keys_first1, keys_last1, keys_first2, keys_last2, values_first1, values_first2, keys_result, values_result, thrust::less()); -} // end set_union_by_key() - - -template - thrust::pair - set_union_by_key(thrust::execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - typedef thrust::tuple iterator_tuple1; - typedef thrust::tuple iterator_tuple2; - typedef thrust::tuple iterator_tuple3; - - typedef thrust::zip_iterator zip_iterator1; - typedef thrust::zip_iterator zip_iterator2; - typedef thrust::zip_iterator zip_iterator3; - - zip_iterator1 zipped_first1 = thrust::make_zip_iterator(thrust::make_tuple(keys_first1, values_first1)); - zip_iterator1 zipped_last1 = thrust::make_zip_iterator(thrust::make_tuple(keys_last1, values_first1)); - - zip_iterator2 zipped_first2 = thrust::make_zip_iterator(thrust::make_tuple(keys_first2, values_first2)); - zip_iterator2 zipped_last2 = thrust::make_zip_iterator(thrust::make_tuple(keys_last2, values_first2)); - - zip_iterator3 zipped_result = thrust::make_zip_iterator(thrust::make_tuple(keys_result, values_result)); - - thrust::detail::compare_first comp_first(comp); - - iterator_tuple3 result = thrust::set_union(exec, zipped_first1, zipped_last1, zipped_first2, zipped_last2, zipped_result, comp_first).get_iterator_tuple(); - - return thrust::make_pair(thrust::get<0>(result), thrust::get<1>(result)); -} // end set_union_by_key() - - -template - OutputIterator set_difference(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // unimplemented primitive - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return result; -} // end set_difference() - - -template - OutputIterator set_intersection(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // unimplemented primitive - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return result; -} // end set_intersection() - - -template - OutputIterator set_symmetric_difference(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // unimplemented primitive - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return result; -} // end set_symmetric_difference() - - -template - OutputIterator set_union(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // unimplemented primitive - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - return result; -} // end set_union() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/sort.h b/compat/thrust/system/detail/generic/sort.h deleted file mode 100644 index 5498708a3b..0000000000 --- a/compat/thrust/system/detail/generic/sort.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - void sort(thrust::execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last); - - -template - void sort(thrust::execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - - -template - void sort_by_key(thrust::execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - - -template - void sort_by_key(thrust::execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - - -template - void stable_sort(thrust::execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last); - - -// XXX it is an error to call this function; it has no implementation -template - void stable_sort(thrust::execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - - -template - void stable_sort_by_key(thrust::execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - - -// XXX it is an error to call this function; it has no implementation -template - void stable_sort_by_key(thrust::execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - - -template - bool is_sorted(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last); - - -template - bool is_sorted(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Compare comp); - - -template - ForwardIterator is_sorted_until(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last); - - -template - ForwardIterator is_sorted_until(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Compare comp); - - -} // end generic -} // end detail -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/detail/generic/sort.inl b/compat/thrust/system/detail/generic/sort.inl deleted file mode 100644 index aabb2eed81..0000000000 --- a/compat/thrust/system/detail/generic/sort.inl +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - void sort(thrust::execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last) -{ - typedef typename thrust::iterator_value::type value_type; - thrust::sort(exec, first, last, thrust::less()); -} // end sort() - - -template - void sort(thrust::execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - // implement with stable_sort - thrust::stable_sort(exec, first, last, comp); -} // end sort() - - -template - void sort_by_key(thrust::execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - typedef typename thrust::iterator_value::type value_type; - thrust::sort_by_key(exec, keys_first, keys_last, values_first, thrust::less()); -} // end sort_by_key() - - -template - void sort_by_key(thrust::execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - // implement with stable_sort_by_key - thrust::stable_sort_by_key(exec, keys_first, keys_last, values_first, comp); -} // end sort_by_key() - - -template - void stable_sort(thrust::execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last) -{ - typedef typename thrust::iterator_value::type value_type; - thrust::stable_sort(exec, first, last, thrust::less()); -} // end stable_sort() - - -template - void stable_sort_by_key(thrust::execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - typedef typename iterator_value::type value_type; - thrust::stable_sort_by_key(exec, keys_first, keys_last, values_first, thrust::less()); -} // end stable_sort_by_key() - - -template - bool is_sorted(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last) -{ - return thrust::is_sorted_until(exec, first, last) == last; -} // end is_sorted() - - -template - bool is_sorted(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Compare comp) -{ - return thrust::is_sorted_until(exec, first, last, comp) == last; -} // end is_sorted() - - -template - ForwardIterator is_sorted_until(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last) -{ - typedef typename thrust::iterator_value::type InputType; - - return thrust::is_sorted_until(exec, first, last, thrust::less()); -} // end is_sorted_until() - - -template - ForwardIterator is_sorted_until(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Compare comp) -{ - if(thrust::distance(first,last) < 2) return last; - - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ForwardIterator first_plus_one = first; - thrust::advance(first_plus_one, 1); - - ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first_plus_one, first)); - ZipIterator zipped_last = thrust::make_zip_iterator(thrust::make_tuple(last, first)); - - return thrust::get<0>(thrust::find_if(exec, zipped_first, zipped_last, thrust::detail::tuple_binary_predicate(comp)).get_iterator_tuple()); -} // end is_sorted_until() - - -template - void stable_sort(tag, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - // unimplemented primitive - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); -} // end stable_sort() - - -template - void stable_sort_by_key(tag, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - // unimplemented primitive - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); -} // end stable_sort_by_key() - - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/swap_ranges.h b/compat/thrust/system/detail/generic/swap_ranges.h deleted file mode 100644 index 5d640d3feb..0000000000 --- a/compat/thrust/system/detail/generic/swap_ranges.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - ForwardIterator2 swap_ranges(thrust::execution_policy &exec, - ForwardIterator1 first1, - ForwardIterator1 last1, - ForwardIterator2 first2); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/swap_ranges.inl b/compat/thrust/system/detail/generic/swap_ranges.inl deleted file mode 100644 index 0e12d07627..0000000000 --- a/compat/thrust/system/detail/generic/swap_ranges.inl +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -// XXX define this here rather than in internal_functional.h -// to avoid circular dependence between swap.h & internal_functional.h -struct swap_pair_elements -{ - template - __host__ __device__ - void operator()(Tuple t) - { - // use unqualified swap to allow ADL to catch any user-defined swap - using thrust::swap; - swap(thrust::get<0>(t), thrust::get<1>(t)); - } -}; // end swap_pair_elements - -} // end detail - -template - ForwardIterator2 swap_ranges(thrust::execution_policy &exec, - ForwardIterator1 first1, - ForwardIterator1 last1, - ForwardIterator2 first2) -{ - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator result = thrust::for_each(exec, - thrust::make_zip_iterator(thrust::make_tuple(first1, first2)), - thrust::make_zip_iterator(thrust::make_tuple(last1, first2)), - detail::swap_pair_elements()); - return thrust::get<1>(result.get_iterator_tuple()); -} // end swap_ranges() - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/tabulate.h b/compat/thrust/system/detail/generic/tabulate.h deleted file mode 100644 index e5911b14ec..0000000000 --- a/compat/thrust/system/detail/generic/tabulate.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - void tabulate(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - UnaryOperation unary_op); - -template - OutputIterator tabulate_n(thrust::execution_policy &exec, - OutputIterator first, - Size n, - UnaryOperation unary_op); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/tabulate.inl b/compat/thrust/system/detail/generic/tabulate.inl deleted file mode 100644 index d2ffc26df6..0000000000 --- a/compat/thrust/system/detail/generic/tabulate.inl +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - void tabulate(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - UnaryOperation unary_op) -{ - typedef typename iterator_difference::type difference_type; - - // by default, counting_iterator uses a 64b difference_type on 32b platforms to avoid overflowing its counter. - // this causes problems when a zip_iterator is created in transform's implementation -- ForwardIterator is - // incremented by a 64b difference_type and some compilers warn - // to avoid this, specify the counting_iterator's difference_type to be the same as ForwardIterator's. - thrust::counting_iterator iter(0); - - thrust::transform(exec, iter, iter + thrust::distance(first, last), first, unary_op); -} // end tabulate() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - - diff --git a/compat/thrust/system/detail/generic/tag.h b/compat/thrust/system/detail/generic/tag.h deleted file mode 100644 index 577d6a37a7..0000000000 --- a/compat/thrust/system/detail/generic/tag.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file generic/tag.h - * \brief Implementation of the generic backend's tag. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -// tag exists only to make the generic entry points the least priority match -// during ADL. tag should not be derived from and is constructible from anything -struct tag -{ - template - __host__ __device__ inline - tag(const T &) {} -}; - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/temporary_buffer.h b/compat/thrust/system/detail/generic/temporary_buffer.h deleted file mode 100644 index 8cb08b06ae..0000000000 --- a/compat/thrust/system/detail/generic/temporary_buffer.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - thrust::pair, typename thrust::pointer::difference_type> - get_temporary_buffer(thrust::execution_policy &exec, typename thrust::pointer::difference_type n); - - -template - void return_temporary_buffer(thrust::execution_policy &exec, Pointer p); - - -} // end generic -} // end detail -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/detail/generic/temporary_buffer.inl b/compat/thrust/system/detail/generic/temporary_buffer.inl deleted file mode 100644 index 0a6be7ee08..0000000000 --- a/compat/thrust/system/detail/generic/temporary_buffer.inl +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - thrust::pair, typename thrust::pointer::difference_type> - get_temporary_buffer(thrust::execution_policy &exec, typename thrust::pointer::difference_type n) -{ - thrust::pointer ptr = thrust::malloc(exec, n); - - // check for a failed malloc - if(!ptr.get()) - { - n = 0; - } // end if - - return thrust::make_pair(ptr, n); -} // end get_temporary_buffer() - - -template - void return_temporary_buffer(thrust::execution_policy &exec, Pointer p) -{ - thrust::free(exec, p); -} // end return_temporary_buffer() - - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/transform.h b/compat/thrust/system/detail/generic/transform.h deleted file mode 100644 index e98d40291e..0000000000 --- a/compat/thrust/system/detail/generic/transform.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - OutputIterator transform(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction op); - -template - OutputIterator transform(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryFunction op); - -template - ForwardIterator transform_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - ForwardIterator result, - UnaryFunction unary_op, - Predicate pred); - -template - ForwardIterator transform_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - ForwardIterator result, - UnaryFunction unary_op, - Predicate pred); - -template - ForwardIterator transform_if(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator3 stencil, - ForwardIterator result, - BinaryFunction binary_op, - Predicate pred); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/transform.inl b/compat/thrust/system/detail/generic/transform.inl deleted file mode 100644 index 8f0995328c..0000000000 --- a/compat/thrust/system/detail/generic/transform.inl +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - OutputIterator transform(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction op) -{ - // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke - // a function which is only __host__ or __device__ by selecting a generic functor - // which is one or the other - // when nvcc is able to deal with this, remove this WAR - - // given the minimal system, determine the unary transform functor we need - typedef typename thrust::detail::unary_transform_functor::type UnaryTransformFunctor; - - // make an iterator tuple - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator zipped_result = - thrust::for_each(exec, - thrust::make_zip_iterator(thrust::make_tuple(first,result)), - thrust::make_zip_iterator(thrust::make_tuple(last,result)), - UnaryTransformFunctor(op)); - - return thrust::get<1>(zipped_result.get_iterator_tuple()); -} // end transform() - - -template - OutputIterator transform(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryFunction op) -{ - // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke - // a function which is only __host__ or __device__ by selecting a generic functor - // which is one or the other - // when nvcc is able to deal with this, remove this WAR - - // given the minimal system, determine the binary transform functor we need - typedef typename thrust::detail::binary_transform_functor::type BinaryTransformFunctor; - - // make an iterator tuple - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator zipped_result = - thrust::for_each(exec, - thrust::make_zip_iterator(thrust::make_tuple(first1,first2,result)), - thrust::make_zip_iterator(thrust::make_tuple(last1,first2,result)), - BinaryTransformFunctor(op)); - - return thrust::get<2>(zipped_result.get_iterator_tuple()); -} // end transform() - - -template - ForwardIterator transform_if(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - ForwardIterator result, - UnaryFunction unary_op, - Predicate pred) -{ - // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke - // a function which is only __host__ or __device__ by selecting a generic functor - // which is one or the other - // when nvcc is able to deal with this, remove this WAR - - // given the minimal system, determine the unary transform_if functor we need - typedef typename thrust::detail::unary_transform_if_functor::type UnaryTransformIfFunctor; - - // make an iterator tuple - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator zipped_result = - thrust::for_each(exec, - thrust::make_zip_iterator(thrust::make_tuple(first,result)), - thrust::make_zip_iterator(thrust::make_tuple(last,result)), - UnaryTransformIfFunctor(unary_op,pred)); - - return thrust::get<1>(zipped_result.get_iterator_tuple()); -} // end transform_if() - - -template - ForwardIterator transform_if(thrust::execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - ForwardIterator result, - UnaryFunction unary_op, - Predicate pred) -{ - // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke - // a function which is only __host__ or __device__ by selecting a generic functor - // which is one or the other - // when nvcc is able to deal with this, remove this WAR - - // given the minimal system, determine the unary transform_if functor we need - typedef typename thrust::detail::unary_transform_if_with_stencil_functor::type UnaryTransformIfFunctor; - - // make an iterator tuple - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator zipped_result = - thrust::for_each(exec, - thrust::make_zip_iterator(thrust::make_tuple(first,stencil,result)), - thrust::make_zip_iterator(thrust::make_tuple(last,stencil,result)), - UnaryTransformIfFunctor(unary_op,pred)); - - return thrust::get<2>(zipped_result.get_iterator_tuple()); -} // end transform_if() - - -template - ForwardIterator transform_if(thrust::execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator3 stencil, - ForwardIterator result, - BinaryFunction binary_op, - Predicate pred) -{ - // XXX WAR the problem of a generic __host__ __device__ functor's inability to invoke - // a function which is only __host__ or __device__ by selecting a generic functor - // which is one or the other - // when nvcc is able to deal with this, remove this WAR - - // given the minimal system, determine the binary transform_if functor we need - typedef typename thrust::detail::binary_transform_if_functor::type BinaryTransformIfFunctor; - - // make an iterator tuple - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator zipped_result = - thrust::for_each(exec, - thrust::make_zip_iterator(thrust::make_tuple(first1,first2,stencil,result)), - thrust::make_zip_iterator(thrust::make_tuple(last1,first2,stencil,result)), - BinaryTransformIfFunctor(binary_op,pred)); - - return thrust::get<3>(zipped_result.get_iterator_tuple()); -} // end transform_if() - - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/transform_reduce.h b/compat/thrust/system/detail/generic/transform_reduce.h deleted file mode 100644 index c1f098f50f..0000000000 --- a/compat/thrust/system/detail/generic/transform_reduce.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - OutputType transform_reduce(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - UnaryFunction unary_op, - OutputType init, - BinaryFunction binary_op); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/transform_reduce.inl b/compat/thrust/system/detail/generic/transform_reduce.inl deleted file mode 100644 index ce8b6a1213..0000000000 --- a/compat/thrust/system/detail/generic/transform_reduce.inl +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - OutputType transform_reduce(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - UnaryFunction unary_op, - OutputType init, - BinaryFunction binary_op) -{ - thrust::transform_iterator xfrm_first(first, unary_op); - thrust::transform_iterator xfrm_last(last, unary_op); - - return thrust::reduce(exec, xfrm_first, xfrm_last, init, binary_op); -} // end transform_reduce() - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/transform_scan.h b/compat/thrust/system/detail/generic/transform_scan.h deleted file mode 100644 index 99db86e4d9..0000000000 --- a/compat/thrust/system/detail/generic/transform_scan.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - OutputIterator transform_inclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - BinaryFunction binary_op); - -template - OutputIterator transform_exclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - T init, - AssociativeOperator binary_op); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/transform_scan.inl b/compat/thrust/system/detail/generic/transform_scan.inl deleted file mode 100644 index a95ec20e67..0000000000 --- a/compat/thrust/system/detail/generic/transform_scan.inl +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - OutputIterator transform_inclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - BinaryFunction binary_op) -{ - // the pseudocode for deducing the type of the temporary used below: - // - // if UnaryFunction is AdaptableUnaryFunction - // TemporaryType = AdaptableUnaryFunction::result_type - // else if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - typedef typename thrust::detail::eval_if< - thrust::detail::has_result_type::value, - thrust::detail::result_type, - thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - thrust::transform_iterator _first(first, unary_op); - thrust::transform_iterator _last(last, unary_op); - - return thrust::inclusive_scan(exec, _first, _last, result, binary_op); -} // end transform_inclusive_scan() - -template - OutputIterator transform_exclusive_scan(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - T init, - AssociativeOperator binary_op) -{ - // the pseudocode for deducing the type of the temporary used below: - // - // if UnaryFunction is AdaptableUnaryFunction - // TemporaryType = AdaptableUnaryFunction::result_type - // else if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - typedef typename thrust::detail::eval_if< - thrust::detail::has_result_type::value, - thrust::detail::result_type, - thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - thrust::transform_iterator _first(first, unary_op); - thrust::transform_iterator _last(last, unary_op); - - return thrust::exclusive_scan(exec, _first, _last, result, init, binary_op); -} // end transform_exclusive_scan() - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - - diff --git a/compat/thrust/system/detail/generic/type_traits.h b/compat/thrust/system/detail/generic/type_traits.h deleted file mode 100644 index 40113525f3..0000000000 --- a/compat/thrust/system/detail/generic/type_traits.h +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file generic/type_traits.h - * \brief Introspection for free functions defined in generic. - */ - -#pragma once - -#include - -namespace thrust -{ - -// forward declaration of any_system_tag for any_conversion below -struct any_system_tag; - -namespace system -{ -namespace detail -{ - -// we must define these traits outside of generic's namespace -namespace generic_type_traits_ns -{ - -typedef char yes; -typedef char (&no)[2]; - -struct any_conversion -{ - template any_conversion(const T &); - - // add this extra constructor to disambiguate conversion from any_system_tag - any_conversion(const any_system_tag &); -}; - -namespace select_system_exists_ns -{ - no select_system(const any_conversion &); - no select_system(const any_conversion &, const any_conversion &); - no select_system(const any_conversion &, const any_conversion &, const any_conversion &); - no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &); - no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &); - no select_system(const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &, const any_conversion &); - - template yes check(const T &); - - no check(no); - - template - struct select_system1_exists - { - static Tag &tag; - - static const bool value = sizeof(check(select_system(tag))) == sizeof(yes); - }; - - template - struct select_system2_exists - { - static Tag1 &tag1; - static Tag2 &tag2; - - static const bool value = sizeof(check(select_system(tag1,tag2))) == sizeof(yes); - }; - - template - struct select_system3_exists - { - static Tag1 &tag1; - static Tag2 &tag2; - static Tag3 &tag3; - - static const bool value = sizeof(check(select_system(tag1,tag2,tag3))) == sizeof(yes); - }; - - template - struct select_system4_exists - { - static Tag1 &tag1; - static Tag2 &tag2; - static Tag3 &tag3; - static Tag4 &tag4; - - static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4))) == sizeof(yes); - }; - - template - struct select_system5_exists - { - static Tag1 &tag1; - static Tag2 &tag2; - static Tag3 &tag3; - static Tag4 &tag4; - static Tag5 &tag5; - - static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4,tag5))) == sizeof(yes); - }; - - template - struct select_system6_exists - { - static Tag1 &tag1; - static Tag2 &tag2; - static Tag3 &tag3; - static Tag4 &tag4; - static Tag5 &tag5; - static Tag6 &tag6; - - static const bool value = sizeof(check(select_system(tag1,tag2,tag3,tag4,tag5,tag6))) == sizeof(yes); - }; -} // end select_system_exists_ns - -} // end generic_type_traits_ns - -namespace generic -{ - -template - struct select_system1_exists - : generic_type_traits_ns::select_system_exists_ns::select_system1_exists -{}; - -template - struct select_system2_exists - : generic_type_traits_ns::select_system_exists_ns::select_system2_exists -{}; - -template - struct select_system3_exists - : generic_type_traits_ns::select_system_exists_ns::select_system3_exists -{}; - -template - struct select_system4_exists - : generic_type_traits_ns::select_system_exists_ns::select_system4_exists -{}; - -template - struct select_system5_exists - : generic_type_traits_ns::select_system_exists_ns::select_system5_exists -{}; - -template - struct select_system6_exists - : generic_type_traits_ns::select_system_exists_ns::select_system6_exists -{}; - -} // end generic -} // end detail -} // end system -} // end thrust - diff --git a/compat/thrust/system/detail/generic/uninitialized_copy.h b/compat/thrust/system/detail/generic/uninitialized_copy.h deleted file mode 100644 index 67e3e68328..0000000000 --- a/compat/thrust/system/detail/generic/uninitialized_copy.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - ForwardIterator uninitialized_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - ForwardIterator result); - -template - ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, - InputIterator first, - Size n, - ForwardIterator result); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/uninitialized_copy.inl b/compat/thrust/system/detail/generic/uninitialized_copy.inl deleted file mode 100644 index 414e6e48fe..0000000000 --- a/compat/thrust/system/detail/generic/uninitialized_copy.inl +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -template - struct uninitialized_copy_functor -{ - template - __host__ __device__ - void operator()(Tuple t) - { - const InputType &in = thrust::get<0>(t); - OutputType &out = thrust::get<1>(t); - - ::new(static_cast(&out)) OutputType(in); - } // end operator()() -}; // end uninitialized_copy_functor - - -// non-trivial copy constructor path -template - ForwardIterator uninitialized_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - ForwardIterator result, - thrust::detail::false_type) // has_trivial_copy_constructor -{ - // zip up the iterators - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator begin = thrust::make_zip_iterator(thrust::make_tuple(first,result)); - ZipIterator end = begin; - - // get a zip_iterator pointing to the end - const typename thrust::iterator_difference::type n = thrust::distance(first,last); - thrust::advance(end, n); - - // create a functor - typedef typename iterator_traits::value_type InputType; - typedef typename iterator_traits::value_type OutputType; - - detail::uninitialized_copy_functor f; - - // do the for_each - thrust::for_each(exec, begin, end, f); - - // return the end of the output range - return thrust::get<1>(end.get_iterator_tuple()); -} // end uninitialized_copy() - - -// trivial copy constructor path -template - ForwardIterator uninitialized_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - ForwardIterator result, - thrust::detail::true_type) // has_trivial_copy_constructor -{ - return thrust::copy(exec, first, last, result); -} // end uninitialized_copy() - - -// non-trivial copy constructor path -template - ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, - InputIterator first, - Size n, - ForwardIterator result, - thrust::detail::false_type) // has_trivial_copy_constructor -{ - // zip up the iterators - typedef thrust::tuple IteratorTuple; - typedef thrust::zip_iterator ZipIterator; - - ZipIterator zipped_first = thrust::make_zip_iterator(thrust::make_tuple(first,result)); - - // create a functor - typedef typename iterator_traits::value_type InputType; - typedef typename iterator_traits::value_type OutputType; - - detail::uninitialized_copy_functor f; - - // do the for_each_n - ZipIterator zipped_last = thrust::for_each_n(exec, zipped_first, n, f); - - // return the end of the output range - return thrust::get<1>(zipped_last.get_iterator_tuple()); -} // end uninitialized_copy_n() - - -// trivial copy constructor path -template - ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, - InputIterator first, - Size n, - ForwardIterator result, - thrust::detail::true_type) // has_trivial_copy_constructor -{ - return thrust::copy_n(exec, first, n, result); -} // end uninitialized_copy_n() - - -} // end detail - - -template - ForwardIterator uninitialized_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - ForwardIterator result) -{ - typedef typename iterator_traits::value_type ResultType; - - typedef typename thrust::detail::has_trivial_copy_constructor::type ResultTypeHasTrivialCopyConstructor; - - return thrust::system::detail::generic::detail::uninitialized_copy(exec, first, last, result, ResultTypeHasTrivialCopyConstructor()); -} // end uninitialized_copy() - - -template - ForwardIterator uninitialized_copy_n(thrust::execution_policy &exec, - InputIterator first, - Size n, - ForwardIterator result) -{ - typedef typename iterator_traits::value_type ResultType; - - typedef typename thrust::detail::has_trivial_copy_constructor::type ResultTypeHasTrivialCopyConstructor; - - return thrust::system::detail::generic::detail::uninitialized_copy_n(exec, first, n, result, ResultTypeHasTrivialCopyConstructor()); -} // end uninitialized_copy_n() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/uninitialized_fill.h b/compat/thrust/system/detail/generic/uninitialized_fill.h deleted file mode 100644 index c1df694c02..0000000000 --- a/compat/thrust/system/detail/generic/uninitialized_fill.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - -template - void uninitialized_fill(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &x); - -template - ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, - ForwardIterator first, - Size n, - const T &x); - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/uninitialized_fill.inl b/compat/thrust/system/detail/generic/uninitialized_fill.inl deleted file mode 100644 index bb30b24fd7..0000000000 --- a/compat/thrust/system/detail/generic/uninitialized_fill.inl +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ -namespace detail -{ - -template - void uninitialized_fill(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &x, - thrust::detail::true_type) // has_trivial_copy_constructor -{ - thrust::fill(exec, first, last, x); -} // end uninitialized_fill() - -template - void uninitialized_fill(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &x, - thrust::detail::false_type) // has_trivial_copy_constructor -{ - typedef typename iterator_traits::value_type ValueType; - - thrust::for_each(exec, first, last, thrust::detail::uninitialized_fill_functor(x)); -} // end uninitialized_fill() - -template - ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, - ForwardIterator first, - Size n, - const T &x, - thrust::detail::true_type) // has_trivial_copy_constructor -{ - return thrust::fill_n(exec, first, n, x); -} // end uninitialized_fill() - -template - ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, - ForwardIterator first, - Size n, - const T &x, - thrust::detail::false_type) // has_trivial_copy_constructor -{ - typedef typename iterator_traits::value_type ValueType; - - return thrust::for_each_n(exec, first, n, thrust::detail::uninitialized_fill_functor(x)); -} // end uninitialized_fill() - -} // end detail - -template - void uninitialized_fill(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - const T &x) -{ - typedef typename iterator_traits::value_type ValueType; - - typedef thrust::detail::has_trivial_copy_constructor ValueTypeHasTrivialCopyConstructor; - - thrust::system::detail::generic::detail::uninitialized_fill(exec, first, last, x, - ValueTypeHasTrivialCopyConstructor()); -} // end uninitialized_fill() - -template - ForwardIterator uninitialized_fill_n(thrust::execution_policy &exec, - ForwardIterator first, - Size n, - const T &x) -{ - typedef typename iterator_traits::value_type ValueType; - - typedef thrust::detail::has_trivial_copy_constructor ValueTypeHasTrivialCopyConstructor; - - return thrust::system::detail::generic::detail::uninitialized_fill_n(exec, first, n, x, - ValueTypeHasTrivialCopyConstructor()); -} // end uninitialized_fill() - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/unique.h b/compat/thrust/system/detail/generic/unique.h deleted file mode 100644 index 57e17cafa4..0000000000 --- a/compat/thrust/system/detail/generic/unique.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template -ForwardIterator unique(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last); - - -template -ForwardIterator unique(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred); - - -template -OutputIterator unique_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output); - - -template -OutputIterator unique_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/unique.inl b/compat/thrust/system/detail/generic/unique.inl deleted file mode 100644 index 42d6b15e82..0000000000 --- a/compat/thrust/system/detail/generic/unique.inl +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file unique.inl - * \brief Inline file for unique.h. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - ForwardIterator unique(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - return thrust::unique(exec, first, last, thrust::equal_to()); -} // end unique() - - -template - ForwardIterator unique(thrust::execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - thrust::detail::temporary_array input(exec, first, last); - - return thrust::unique_copy(exec, input.begin(), input.end(), first, binary_pred); -} // end unique() - - -template - OutputIterator unique_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output) -{ - typedef typename thrust::iterator_value::type value_type; - return thrust::unique_copy(exec, first,last,output,thrust::equal_to()); -} // end unique_copy() - - -template - OutputIterator unique_copy(thrust::execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred) -{ - // empty sequence - if(first == last) - return output; - - thrust::detail::temporary_array stencil(exec, thrust::distance(first, last)); - - // mark first element in each group - stencil[0] = 1; - thrust::transform(exec, first, last - 1, first + 1, stencil.begin() + 1, thrust::detail::not2(binary_pred)); - - return thrust::copy_if(exec, first, last, stencil.begin(), output, thrust::identity()); -} // end unique_copy() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/generic/unique_by_key.h b/compat/thrust/system/detail/generic/unique_by_key.h deleted file mode 100644 index aa62f73e51..0000000000 --- a/compat/thrust/system/detail/generic/unique_by_key.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - thrust::pair - unique_by_key(thrust::execution_policy &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first); - - -template - thrust::pair - unique_by_key(thrust::execution_policy &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred); - - -template - thrust::pair - unique_by_key_copy(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output); - - -template - thrust::pair - unique_by_key_copy(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred); - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/generic/unique_by_key.inl b/compat/thrust/system/detail/generic/unique_by_key.inl deleted file mode 100644 index c780fa71d1..0000000000 --- a/compat/thrust/system/detail/generic/unique_by_key.inl +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace generic -{ - - -template - thrust::pair - unique_by_key(thrust::execution_policy &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first) -{ - typedef typename thrust::iterator_traits::value_type KeyType; - return thrust::unique_by_key(exec, keys_first, keys_last, values_first, thrust::equal_to()); -} // end unique_by_key() - - -template - thrust::pair - unique_by_key(thrust::execution_policy &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred) -{ - typedef typename thrust::iterator_traits::value_type InputType1; - typedef typename thrust::iterator_traits::value_type InputType2; - - ForwardIterator2 values_last = values_first + (keys_last - keys_first); - - thrust::detail::temporary_array keys(exec, keys_first, keys_last); - thrust::detail::temporary_array vals(exec, values_first, values_last); - - return thrust::unique_by_key_copy(exec, keys.begin(), keys.end(), vals.begin(), keys_first, values_first, binary_pred); -} // end unique_by_key() - - -template - thrust::pair - unique_by_key_copy(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output) -{ - typedef typename thrust::iterator_traits::value_type KeyType; - return thrust::unique_by_key_copy(exec, keys_first, keys_last, values_first, keys_output, values_output, thrust::equal_to()); -} // end unique_by_key_copy() - - -template - thrust::pair - unique_by_key_copy(thrust::execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - typedef typename thrust::iterator_traits::difference_type difference_type; - - // empty sequence - if(keys_first == keys_last) - return thrust::make_pair(keys_output, values_output); - - difference_type n = thrust::distance(keys_first, keys_last); - - thrust::detail::temporary_array stencil(exec,n); - - // mark first element in each group - stencil[0] = 1; - thrust::transform(exec, keys_first, keys_last - 1, keys_first + 1, stencil.begin() + 1, thrust::detail::not2(binary_pred)); - - thrust::zip_iterator< thrust::tuple > result = - thrust::copy_if(exec, - thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)), - thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)) + n, - stencil.begin(), - thrust::make_zip_iterator(thrust::make_tuple(keys_output, values_output)), - thrust::identity()); - - difference_type output_size = result - thrust::make_zip_iterator(thrust::make_tuple(keys_output, values_output)); - - return thrust::make_pair(keys_output + output_size, values_output + output_size); -} // end unique_by_key_copy() - - -} // end namespace generic -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/decompose.h b/compat/thrust/system/detail/internal/decompose.h deleted file mode 100644 index dea806d69c..0000000000 --- a/compat/thrust/system/detail/internal/decompose.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ - - template - class index_range - { - public: - typedef IndexType index_type; - - __host__ __device__ - index_range(index_type begin, index_type end) : m_begin(begin), m_end(end) {} - - __host__ __device__ - index_type begin(void) const { return m_begin; } - - __host__ __device__ - index_type end(void) const { return m_end; } - - __host__ __device__ - index_type size(void) const { return m_end - m_begin; } - - private: - index_type m_begin; - index_type m_end; - }; - - template - class uniform_decomposition - { - public: - typedef IndexType index_type; - typedef index_range range_type; - - uniform_decomposition(index_type N, index_type granularity, index_type max_intervals) - : m_N(N), - m_intervals((N + granularity - 1) / granularity), - m_threshold(0), - m_small_interval(granularity), - m_large_interval(0) - { - if(m_intervals > max_intervals) - { - m_small_interval = granularity * (m_intervals / max_intervals); - m_large_interval = m_small_interval + granularity; - m_threshold = m_intervals % max_intervals; - m_intervals = max_intervals; - } - } - - __host__ __device__ - index_range operator[](const index_type& i) const - { - if (i < m_threshold) - { - index_type begin = m_large_interval * i; - index_type end = begin + m_large_interval; - return range_type(begin, end); - } - else - { - index_type begin = m_large_interval * m_threshold + m_small_interval * (i - m_threshold); - index_type end = (begin + m_small_interval < m_N) ? begin + m_small_interval : m_N; - return range_type(begin, end); - } - } - - __host__ __device__ - index_type size(void) const - { - return m_intervals; - } - - private: - - index_type m_N; - index_type m_intervals; - index_type m_threshold; - index_type m_small_interval; - index_type m_large_interval; - }; - - -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/adjacent_difference.h b/compat/thrust/system/detail/internal/scalar/adjacent_difference.h deleted file mode 100644 index d1a95aeec4..0000000000 --- a/compat/thrust/system/detail/internal/scalar/adjacent_difference.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file adjacent_difference.h - * \brief Sequential implementation of adjacent_difference. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -OutputIterator adjacent_difference(InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - typedef typename thrust::iterator_traits::value_type InputType; - - if (first == last) - return result; - - InputType curr = *first; - - *result = curr; - - while (++first != last) - { - InputType next = *first; - *(++result) = binary_op(next, curr); - curr = next; - } - - return ++result; -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/binary_search.h b/compat/thrust/system/detail/internal/scalar/binary_search.h deleted file mode 100644 index c3ac49fbd3..0000000000 --- a/compat/thrust/system/detail/internal/scalar/binary_search.h +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file binary_search.h - * \brief Sequential implementation of binary search algorithms. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -ForwardIterator lower_bound(ForwardIterator first, - ForwardIterator last, - const T& val, - StrictWeakOrdering comp) -{ - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - typedef typename thrust::iterator_difference::type difference_type; - - difference_type len = thrust::distance(first, last); - - while(len > 0) - { - difference_type half = len >> 1; - ForwardIterator middle = first; - - thrust::advance(middle, half); - - if(wrapped_comp(*middle, val)) - { - first = middle; - ++first; - len = len - half - 1; - } - else - { - len = half; - } - } - - return first; -} - - -template -ForwardIterator upper_bound(ForwardIterator first, - ForwardIterator last, - const T& val, - StrictWeakOrdering comp) -{ - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - typedef typename thrust::iterator_difference::type difference_type; - - difference_type len = thrust::distance(first, last); - - while(len > 0) - { - difference_type half = len >> 1; - ForwardIterator middle = first; - - thrust::advance(middle, half); - - if(wrapped_comp(val, *middle)) - { - len = half; - } - else - { - first = middle; - ++first; - len = len - half - 1; - } - } - - return first; -} - -template -bool binary_search(ForwardIterator first, - ForwardIterator last, - const T& val, - StrictWeakOrdering comp) -{ - ForwardIterator iter = thrust::system::detail::internal::scalar::lower_bound(first, last, val, comp); - - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - return iter != last && !wrapped_comp(val,*iter); -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/copy.h b/compat/thrust/system/detail/internal/scalar/copy.h deleted file mode 100644 index 42cb385402..0000000000 --- a/compat/thrust/system/detail/internal/scalar/copy.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file copy.h - * \brief Sequential implementations of copy algorithms. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - OutputIterator copy(InputIterator first, - InputIterator last, - OutputIterator result); - -template - OutputIterator copy_n(InputIterator first, - Size n, - OutputIterator result); - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/internal/scalar/copy.inl b/compat/thrust/system/detail/internal/scalar/copy.inl deleted file mode 100644 index 8c9f5c2825..0000000000 --- a/compat/thrust/system/detail/internal/scalar/copy.inl +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ -namespace copy_detail -{ - - -// returns the raw pointer associated with a Pointer-like thing -template - typename thrust::detail::pointer_traits::raw_pointer - get(Pointer ptr) -{ - return thrust::detail::pointer_traits::get(ptr); -} - - -template - OutputIterator copy(InputIterator first, - InputIterator last, - OutputIterator result, - thrust::detail::true_type) // is_trivial_copy -{ - typedef typename thrust::iterator_difference::type Size; - - const Size n = last - first; - thrust::system::detail::internal::scalar::trivial_copy_n(get(&*first), n, get(&*result)); - return result + n; -} // end copy() - - -template - OutputIterator copy(InputIterator first, - InputIterator last, - OutputIterator result, - thrust::detail::false_type) // is_trivial_copy -{ - return thrust::system::detail::internal::scalar::general_copy(first,last,result); -} // end copy() - - -template - OutputIterator copy_n(InputIterator first, - Size n, - OutputIterator result, - thrust::detail::true_type) // is_trivial_copy -{ - thrust::system::detail::internal::scalar::trivial_copy_n(get(&*first), n, get(&*result)); - return result + n; -} // end copy_n() - - -template - OutputIterator copy_n(InputIterator first, - Size n, - OutputIterator result, - thrust::detail::false_type) // is_trivial_copy -{ - return thrust::system::detail::internal::scalar::general_copy_n(first,n,result); -} // end copy_n() - -} // end namespace copy_detail - - -template - OutputIterator copy(InputIterator first, - InputIterator last, - OutputIterator result) -{ - return thrust::system::detail::internal::scalar::copy_detail::copy(first, last, result, - typename thrust::detail::dispatch::is_trivial_copy::type()); -} // end copy() - - -template - OutputIterator copy_n(InputIterator first, - Size n, - OutputIterator result) -{ - return thrust::system::detail::internal::scalar::copy_detail::copy_n(first, n, result, - typename thrust::detail::dispatch::is_trivial_copy::type()); -} // end copy_n() - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/copy_backward.h b/compat/thrust/system/detail/internal/scalar/copy_backward.h deleted file mode 100644 index 36f8f66e44..0000000000 --- a/compat/thrust/system/detail/internal/scalar/copy_backward.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -BidirectionalIterator2 copy_backward(BidirectionalIterator1 first, - BidirectionalIterator1 last, - BidirectionalIterator2 result) -{ - while (first != last) - { - --last; - --result; - *result = *last; - } - - return result; -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/copy_if.h b/compat/thrust/system/detail/internal/scalar/copy_if.h deleted file mode 100644 index 67f9402335..0000000000 --- a/compat/thrust/system/detail/internal/scalar/copy_if.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file copy_if.h - * \brief Sequential implementation of copy_if. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - OutputIterator copy_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - thrust::detail::host_function wrapped_pred(pred); - - while(first != last) - { - if(wrapped_pred(*stencil)) - { - *result = *first; - ++result; - } // end if - - ++first; - ++stencil; - } // end while - - return result; -} // end copy_if() - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/extrema.h b/compat/thrust/system/detail/internal/scalar/extrema.h deleted file mode 100644 index ebea756d8c..0000000000 --- a/compat/thrust/system/detail/internal/scalar/extrema.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file extrema.h - * \brief Sequential implementations of extrema functions. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -ForwardIterator min_element(ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // wrap comp - thrust::detail::host_function< - BinaryPredicate, - bool - > wrapped_comp(comp); - - ForwardIterator imin = first; - - for (; first != last; first++) - { - if (wrapped_comp(*first, *imin)) - { - imin = first; - } - } - - return imin; -} - - -template -ForwardIterator max_element(ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // wrap comp - thrust::detail::host_function< - BinaryPredicate, - bool - > wrapped_comp(comp); - - ForwardIterator imax = first; - - for (; first != last; first++) - { - if (wrapped_comp(*imax, *first)) - { - imax = first; - } - } - - return imax; -} - - -template -thrust::pair minmax_element(ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // wrap comp - thrust::detail::host_function< - BinaryPredicate, - bool - > wrapped_comp(comp); - - ForwardIterator imin = first; - ForwardIterator imax = first; - - for (; first != last; first++) - { - if (wrapped_comp(*first, *imin)) - { - imin = first; - } - - if (wrapped_comp(*imax, *first)) - { - imax = first; - } - } - - return thrust::make_pair(imin, imax); -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/find.h b/compat/thrust/system/detail/internal/scalar/find.h deleted file mode 100644 index 6b2502199e..0000000000 --- a/compat/thrust/system/detail/internal/scalar/find.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file find.h - * \brief Sequential implementation of find_if. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -InputIterator find_if(InputIterator first, - InputIterator last, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - while(first != last) - { - if (wrapped_pred(*first)) - return first; - - ++first; - } - - // return first so zip_iterator works correctly - return first; -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/for_each.h b/compat/thrust/system/detail/internal/scalar/for_each.h deleted file mode 100644 index 4e31d9183b..0000000000 --- a/compat/thrust/system/detail/internal/scalar/for_each.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file for_each.h - * \brief Sequential implementations of for_each functions. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -InputIterator for_each(InputIterator first, - InputIterator last, - UnaryFunction f) -{ - // wrap f - thrust::detail::host_function< - UnaryFunction, - void - > wrapped_f(f); - - for(; first != last; ++first) - { - wrapped_f(*first); - } - - return first; -} // end for_each() - -template -InputIterator for_each_n(InputIterator first, - Size n, - UnaryFunction f) -{ - // wrap f - thrust::detail::host_function< - UnaryFunction, - void - > wrapped_f(f); - - for(Size i = 0; i != n; i++) - { - // we can dereference an OutputIterator if f does not - // try to use the reference for anything besides assignment - wrapped_f(*first); - ++first; - } - - return first; -} // end for_each_n() - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/general_copy.h b/compat/thrust/system/detail/internal/scalar/general_copy.h deleted file mode 100644 index aae061d31d..0000000000 --- a/compat/thrust/system/detail/internal/scalar/general_copy.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file general_copy.h - * \brief Sequential copy algorithms for general iterators. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - OutputIterator general_copy(InputIterator first, - InputIterator last, - OutputIterator result) -{ - for(; first != last; ++first, ++result) - *result = *first; - return result; -} // end general_copy() - - -template - OutputIterator general_copy_n(InputIterator first, - Size n, - OutputIterator result) -{ - for(; n > Size(0); ++first, ++result, --n) - *result = *first; - return result; -} // end general_copy_n() - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/insertion_sort.h b/compat/thrust/system/detail/internal/scalar/insertion_sort.h deleted file mode 100644 index 5949ce7a65..0000000000 --- a/compat/thrust/system/detail/internal/scalar/insertion_sort.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -void insertion_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_value::type value_type; - - if (first == last) return; - - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - for(RandomAccessIterator i = first + 1; i != last; ++i) - { - value_type tmp = *i; - - if (wrapped_comp(tmp, *first)) - { - // tmp is the smallest value encountered so far - thrust::system::detail::internal::scalar::copy_backward(first, i, i + 1); - - *first = tmp; - } - else - { - // tmp is not the smallest value, can avoid checking for j == first - RandomAccessIterator j = i; - RandomAccessIterator k = i - 1; - - while(wrapped_comp(tmp, *k)) - { - *j = *k; - j = k; - --k; - } - - *j = tmp; - } - } -} - -template -void insertion_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_value::type value_type1; - typedef typename thrust::iterator_value::type value_type2; - - if (first1 == last1) return; - - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - RandomAccessIterator1 i1 = first1 + 1; - RandomAccessIterator2 i2 = first2 + 1; - - for(; i1 != last1; ++i1, ++i2) - { - value_type1 tmp1 = *i1; - value_type2 tmp2 = *i2; - - if (wrapped_comp(tmp1, *first1)) - { - // tmp is the smallest value encountered so far - thrust::system::detail::internal::scalar::copy_backward(first1, i1, i1 + 1); - thrust::system::detail::internal::scalar::copy_backward(first2, i2, i2 + 1); - - *first1 = tmp1; - *first2 = tmp2; - } - else - { - // tmp is not the smallest value, can avoid checking for j == first - RandomAccessIterator1 j1 = i1; - RandomAccessIterator1 k1 = i1 - 1; - - RandomAccessIterator2 j2 = i2; - RandomAccessIterator2 k2 = i2 - 1; - - while(wrapped_comp(tmp1, *k1)) - { - *j1 = *k1; - *j2 = *k2; - - j1 = k1; - j2 = k2; - - --k1; - --k2; - } - - *j1 = tmp1; - *j2 = tmp2; - } - } -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/merge.h b/compat/thrust/system/detail/internal/scalar/merge.h deleted file mode 100644 index c02fca44b6..0000000000 --- a/compat/thrust/system/detail/internal/scalar/merge.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file merge.h - * \brief Sequential implementation of merge algorithms. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -OutputIterator merge(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp); - -template -thrust::pair - merge_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp); - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/internal/scalar/merge.inl b/compat/thrust/system/detail/internal/scalar/merge.inl deleted file mode 100644 index a7c2a393c4..0000000000 --- a/compat/thrust/system/detail/internal/scalar/merge.inl +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -OutputIterator merge(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - while(first1 != last1 && first2 != last2) - { - if(wrapped_comp(*first2, *first1)) - { - *result = *first2; - ++first2; - } // end if - else - { - *result = *first1; - ++first1; - } // end else - - ++result; - } // end while - - return thrust::system::detail::internal::scalar::copy(first2, last2, thrust::system::detail::internal::scalar::copy(first1, last1, result)); -} // end merge() - - -template -thrust::pair - merge_by_key(InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - while(keys_first1 != keys_last1 && keys_first2 != keys_last2) - { - if(!wrapped_comp(*keys_first2, *keys_first1)) - { - // *keys_first1 <= *keys_first2 - *keys_result = *keys_first1; - *values_result = *values_first1; - ++keys_first1; - ++values_first1; - } - else - { - // *keys_first1 > keys_first2 - *keys_result = *keys_first2; - *values_result = *values_first2; - ++keys_first2; - ++values_first2; - } - - ++keys_result; - ++values_result; - } - - while(keys_first1 != keys_last1) - { - *keys_result = *keys_first1; - *values_result = *values_first1; - ++keys_first1; - ++values_first1; - ++keys_result; - ++values_result; - } - - while(keys_first2 != keys_last2) - { - *keys_result = *keys_first2; - *values_result = *values_first2; - ++keys_first2; - ++values_first2; - ++keys_result; - ++values_result; - } - - return thrust::make_pair(keys_result, values_result); -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/partition.h b/compat/thrust/system/detail/internal/scalar/partition.h deleted file mode 100644 index 7ba677ef22..0000000000 --- a/compat/thrust/system/detail/internal/scalar/partition.h +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file partition.h - * \brief Sequential implementations of partition functions. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -void iter_swap(ForwardIterator1 iter1, ForwardIterator2 iter2) -{ - // XXX this isn't correct because it doesn't use thrust::swap - using namespace thrust::detail; - - typedef typename thrust::iterator_value::type T; - - T temp = *iter1; - *iter1 = *iter2; - *iter2 = temp; -} - -template - ForwardIterator partition(ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - if (first == last) - return first; - - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - while (wrapped_pred(*first)) - { - if (++first == last) - return first; - } - - ForwardIterator next = first; - - while (++next != last) - { - if (wrapped_pred(*next)) - { - iter_swap(first, next); - ++first; - } - } - - return first; -} - -template - ForwardIterator stable_partition(ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - // XXX the type of exec should be: - // typedef decltype(select_system(first, last)) system; - typedef typename thrust::iterator_system::type ExecutionPolicy; - typedef typename thrust::iterator_value::type T; - - typedef thrust::detail::temporary_array TempRange; - typedef typename TempRange::iterator TempIterator; - - // XXX presumes ExecutionPolicy is default constructible - ExecutionPolicy exec; - TempRange temp(exec, first, last); - - for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter) - { - if (wrapped_pred(*iter)) - { - *first = *iter; - ++first; - } - } - - ForwardIterator middle = first; - - for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter) - { - if (!wrapped_pred(*iter)) - { - *first = *iter; - ++first; - } - } - - return middle; -} - -template - ForwardIterator stable_partition(ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - // XXX the type of exec should be: - // typedef decltype(select_system(first, stencil)) system; - typedef typename thrust::iterator_system::type ExecutionPolicy; - typedef typename thrust::iterator_value::type T; - - typedef thrust::detail::temporary_array TempRange; - typedef typename TempRange::iterator TempIterator; - - // XXX presumes ExecutionPolicy is default constructible - ExecutionPolicy exec; - TempRange temp(exec, first, last); - - InputIterator stencil_iter = stencil; - for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter, ++stencil_iter) - { - if (wrapped_pred(*stencil_iter)) - { - *first = *iter; - ++first; - } - } - - ForwardIterator middle = first; - stencil_iter = stencil; - - for(TempIterator iter = temp.begin(); iter != temp.end(); ++iter, ++stencil_iter) - { - if (!wrapped_pred(*stencil_iter)) - { - *first = *iter; - ++first; - } - } - - return middle; -} - -template - thrust::pair - stable_partition_copy(InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - for(; first != last; ++first) - { - if(wrapped_pred(*first)) - { - *out_true = *first; - ++out_true; - } // end if - else - { - *out_false = *first; - ++out_false; - } // end else - } - - return thrust::make_pair(out_true, out_false); -} - -template - thrust::pair - stable_partition_copy(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - for(; first != last; ++first, ++stencil) - { - if(wrapped_pred(*stencil)) - { - *out_true = *first; - ++out_true; - } // end if - else - { - *out_false = *first; - ++out_false; - } // end else - } - - return thrust::make_pair(out_true, out_false); -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/reduce.h b/compat/thrust/system/detail/internal/scalar/reduce.h deleted file mode 100644 index 7ad430ea50..0000000000 --- a/compat/thrust/system/detail/internal/scalar/reduce.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief Sequential implementation of reduce algorithm. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - OutputType reduce(InputIterator begin, - InputIterator end, - OutputType init, - BinaryFunction binary_op) -{ - // wrap binary_op - thrust::detail::host_function< - BinaryFunction, - OutputType - > wrapped_binary_op(binary_op); - - // initialize the result - OutputType result = init; - - while(begin != end) - { - result = wrapped_binary_op(result, *begin); - ++begin; - } // end while - - return result; -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/reduce_by_key.h b/compat/thrust/system/detail/internal/scalar/reduce_by_key.h deleted file mode 100644 index eeacb9dd49..0000000000 --- a/compat/thrust/system/detail/internal/scalar/reduce_by_key.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - thrust::pair - reduce_by_key(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - typedef typename thrust::iterator_traits::value_type InputKeyType; - typedef typename thrust::iterator_traits::value_type InputValueType; - - typedef typename thrust::detail::intermediate_type_from_function_and_iterators< - InputIterator2, - OutputIterator2, - BinaryFunction - >::type TemporaryType; - - if(keys_first != keys_last) - { - InputKeyType temp_key = *keys_first; - TemporaryType temp_value = *values_first; - - for(++keys_first, ++values_first; - keys_first != keys_last; - ++keys_first, ++values_first) - { - InputKeyType key = *keys_first; - InputValueType value = *values_first; - - if (binary_pred(temp_key, key)) - { - temp_value = binary_op(temp_value, value); - } - else - { - *keys_output = temp_key; - *values_output = temp_value; - - ++keys_output; - ++values_output; - - temp_key = key; - temp_value = value; - } - } - - *keys_output = temp_key; - *values_output = temp_value; - - ++keys_output; - ++values_output; - } - - return thrust::make_pair(keys_output, values_output); -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/remove.h b/compat/thrust/system/detail/internal/scalar/remove.h deleted file mode 100644 index 2360019f11..0000000000 --- a/compat/thrust/system/detail/internal/scalar/remove.h +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file remove.h - * \brief Sequential implementations of remove functions. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - ForwardIterator remove_if(ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - // advance iterators until wrapped_pred(*first) is true or we reach the end of input - while(first != last && !wrapped_pred(*first)) - ++first; - - if(first == last) - return first; - - // result always trails first - ForwardIterator result = first; - - ++first; - - while(first != last) - { - if(!wrapped_pred(*first)) - { - *result = *first; - ++result; - } - ++first; - } - - return result; -} - - -template - ForwardIterator remove_if(ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - // advance iterators until wrapped_pred(*stencil) is true or we reach the end of input - while(first != last && !wrapped_pred(*stencil)) - { - ++first; - ++stencil; - } - - if(first == last) - return first; - - // result always trails first - ForwardIterator result = first; - - ++first; - ++stencil; - - while(first != last) - { - if(!wrapped_pred(*stencil)) - { - *result = *first; - ++result; - } - ++first; - ++stencil; - } - - return result; -} - - -template - OutputIterator remove_copy_if(InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - while (first != last) - { - if (!wrapped_pred(*first)) - { - *result = *first; - ++result; - } - - ++first; - } - - return result; -} - -template - OutputIterator remove_copy_if(InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - // wrap pred - thrust::detail::host_function< - Predicate, - bool - > wrapped_pred(pred); - - while (first != last) - { - if (!wrapped_pred(*stencil)) - { - *result = *first; - ++result; - } - - ++first; - ++stencil; - } - - return result; -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/scan.h b/compat/thrust/system/detail/internal/scalar/scan.h deleted file mode 100644 index 8f41150247..0000000000 --- a/compat/thrust/system/detail/internal/scalar/scan.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan.h - * \brief Sequential implementations of scan functions. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - OutputIterator inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - // the pseudocode for deducing the type of the temporary used below: - // - // if BinaryFunction is AdaptableBinaryFunction - // TemporaryType = AdaptableBinaryFunction::result_type - // else if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - using namespace thrust::detail; - - typedef typename eval_if< - has_result_type::value, - result_type, - eval_if< - is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - // wrap binary_op - thrust::detail::host_function< - BinaryFunction, - ValueType - > wrapped_binary_op(binary_op); - - if(first != last) - { - ValueType sum = *first; - - *result = sum; - - for(++first, ++result; first != last; ++first, ++result) - *result = sum = wrapped_binary_op(sum,*first); - } - - return result; -} - - -template - OutputIterator exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - T init, - BinaryFunction binary_op) -{ - // the pseudocode for deducing the type of the temporary used below: - // - // if BinaryFunction is AdaptableBinaryFunction - // TemporaryType = AdaptableBinaryFunction::result_type - // else if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - using namespace thrust::detail; - - typedef typename eval_if< - has_result_type::value, - result_type, - eval_if< - is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - if(first != last) - { - ValueType tmp = *first; // temporary value allows in-situ scan - ValueType sum = init; - - *result = sum; - sum = binary_op(sum, tmp); - - for(++first, ++result; first != last; ++first, ++result) - { - tmp = *first; - *result = sum; - sum = binary_op(sum, tmp); - } - } - - return result; -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/scan_by_key.h b/compat/thrust/system/detail/internal/scalar/scan_by_key.h deleted file mode 100644 index a31fc60bab..0000000000 --- a/compat/thrust/system/detail/internal/scalar/scan_by_key.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan_by_key.h - * \brief Sequential implementation of scan_by_key functions. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - OutputIterator inclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - using namespace thrust::detail; - - typedef typename thrust::iterator_traits::value_type KeyType; - typedef typename thrust::iterator_traits::value_type ValueType; - - // wrap binary_op - thrust::detail::host_function< - BinaryFunction, - ValueType - > wrapped_binary_op(binary_op); - - if(first1 != last1) - { - KeyType prev_key = *first1; - ValueType prev_value = *first2; - - *result = prev_value; - - for(++first1, ++first2, ++result; - first1 != last1; - ++first1, ++first2, ++result) - { - KeyType key = *first1; - - if (binary_pred(prev_key, key)) - *result = prev_value = wrapped_binary_op(prev_value,*first2); - else - *result = prev_value = *first2; - - prev_key = key; - } - } - - return result; -} - - -template - OutputIterator exclusive_scan_by_key(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - T init, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - using namespace thrust::detail; - - typedef typename thrust::iterator_traits::value_type KeyType; - typedef typename thrust::iterator_traits::value_type ValueType; - - if(first1 != last1) - { - KeyType temp_key = *first1; - ValueType temp_value = *first2; - - ValueType next = init; - - // first one is init - *result = next; - - next = binary_op(next, temp_value); - - for(++first1, ++first2, ++result; - first1 != last1; - ++first1, ++first2, ++result) - { - KeyType key = *first1; - - // use temp to permit in-place scans - temp_value = *first2; - - if (!binary_pred(temp_key, key)) - next = init; // reset sum - - *result = next; - next = binary_op(next, temp_value); - - temp_key = key; - } - } - - return result; -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/set_operations.h b/compat/thrust/system/detail/internal/scalar/set_operations.h deleted file mode 100644 index f85b5108af..0000000000 --- a/compat/thrust/system/detail/internal/scalar/set_operations.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file set_operations.h - * \brief Sequential implementation of set operation functions. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - OutputIterator set_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - while(first1 != last1 && first2 != last2) - { - if(wrapped_comp(*first1,*first2)) - { - *result = *first1; - ++first1; - ++result; - } // end if - else if(wrapped_comp(*first2,*first1)) - { - ++first2; - } // end else if - else - { - ++first1; - ++first2; - } // end else - } // end while - - return scalar::copy(first1, last1, result); -} // end set_difference() - - -template - OutputIterator set_intersection(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - while(first1 != last1 && first2 != last2) - { - if(wrapped_comp(*first1,*first2)) - { - ++first1; - } // end if - else if(wrapped_comp(*first2,*first1)) - { - ++first2; - } // end else if - else - { - *result = *first1; - ++first1; - ++first2; - ++result; - } // end else - } // end while - - return result; -} // end set_intersection() - - -template - OutputIterator set_symmetric_difference(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - while(first1 != last1 && first2 != last2) - { - if(wrapped_comp(*first1,*first2)) - { - *result = *first1; - ++first1; - ++result; - } // end if - else if(wrapped_comp(*first2,*first1)) - { - *result = *first2; - ++first2; - ++result; - } // end else if - else - { - ++first1; - ++first2; - } // end else - } // end while - - return scalar::copy(first2, last2, scalar::copy(first1, last1, result)); -} // end set_symmetric_difference() - - -template - OutputIterator set_union(InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - // wrap comp - thrust::detail::host_function< - StrictWeakOrdering, - bool - > wrapped_comp(comp); - - while(first1 != last1 && first2 != last2) - { - if(wrapped_comp(*first1,*first2)) - { - *result = *first1; - ++first1; - } // end if - else if(wrapped_comp(*first2,*first1)) - { - *result = *first2; - ++first2; - } // end else if - else - { - *result = *first1; - ++first1; - ++first2; - } // end else - - ++result; - } // end while - - return scalar::copy(first2, last2, scalar::copy(first1, last1, result)); -} // end set_union() - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/sort.h b/compat/thrust/system/detail/internal/scalar/sort.h deleted file mode 100644 index 9e465c8ca0..0000000000 --- a/compat/thrust/system/detail/internal/scalar/sort.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file sort.h - * \brief Sequential implementations of sort algorithms. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -void stable_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - -template -void stable_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp); - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/internal/scalar/sort.inl b/compat/thrust/system/detail/internal/scalar/sort.inl deleted file mode 100644 index c6ed27324c..0000000000 --- a/compat/thrust/system/detail/internal/scalar/sort.inl +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ -namespace sort_detail -{ - -//////////////////// -// Primitive Sort // -//////////////////// - -template -void stable_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp, - thrust::detail::true_type) -{ - thrust::system::detail::internal::scalar::stable_primitive_sort(first, last); - - // if comp is greater then reverse the keys - typedef typename thrust::iterator_traits::value_type KeyType; - const static bool reverse = thrust::detail::is_same >::value; - - if (reverse) - thrust::reverse(first, last); -} - -template -void stable_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp, - thrust::detail::true_type) -{ - // if comp is greater then reverse the keys and values - typedef typename thrust::iterator_traits::value_type KeyType; - const static bool reverse = thrust::detail::is_same >::value; - - // note, we also have to reverse the (unordered) input to preserve stability - if (reverse) - { - thrust::reverse(first1, last1); - thrust::reverse(first2, first2 + (last1 - first1)); - } - - thrust::system::detail::internal::scalar::stable_primitive_sort_by_key(first1, last1, first2); - - if (reverse) - { - thrust::reverse(first1, last1); - thrust::reverse(first2, first2 + (last1 - first1)); - } -} - -//////////////// -// Merge Sort // -//////////////// - -template -void stable_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp, - thrust::detail::false_type) -{ - thrust::system::detail::internal::scalar::stable_merge_sort(first, last, comp); -} - -template -void stable_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp, - thrust::detail::false_type) -{ - thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, last1, first2, comp); -} - - -} // end namespace sort_detail - -template -void stable_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_traits::value_type KeyType; - static const bool use_primitive_sort = thrust::detail::is_arithmetic::value && - (thrust::detail::is_same >::value || - thrust::detail::is_same >::value); - - // supress unused variable warning - (void) use_primitive_sort; - - thrust::system::detail::internal::scalar::sort_detail::stable_sort - (first, last, comp, - thrust::detail::integral_constant()); -} - -template -void stable_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_traits::value_type KeyType; - static const bool use_primitive_sort = thrust::detail::is_arithmetic::value && - (thrust::detail::is_same >::value || - thrust::detail::is_same >::value); - - // supress unused variable warning - (void) use_primitive_sort; - - thrust::system::detail::internal::scalar::sort_detail::stable_sort_by_key - (first1, last1, first2, comp, - thrust::detail::integral_constant()); -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h b/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h deleted file mode 100644 index f68242cc5a..0000000000 --- a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file stable_merge_sort.h - * \brief Sequential implementation of merge sort. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace cpp -{ -namespace detail -{ - -template -void stable_merge_sort(RandomAccessIterator begin, - RandomAccessIterator end, - StrictWeakOrdering comp); - -template -void stable_merge_sort_by_key(RandomAccessIterator1 keys_begin, - RandomAccessIterator1 keys_end, - RandomAccessIterator2 values_begin, - StrictWeakOrdering comp); - -} // end namespace detail -} // end namespace cpp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl deleted file mode 100644 index 41d320cb08..0000000000 --- a/compat/thrust/system/detail/internal/scalar/stable_merge_sort.inl +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ -namespace detail -{ - -template -void inplace_merge(RandomAccessIterator first, - RandomAccessIterator middle, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - // XXX the type of exec should be: - // typedef decltype(select_system(first, middle, last)) DerivedPolicy; - typedef typename thrust::iterator_system::type DerivedPolicy; - typedef typename thrust::iterator_value::type value_type; - - // XXX assumes DerivedPolicy is default constructible - // XXX find a way to get a stateful execution policy into this function - // or simply pass scratch space - DerivedPolicy exec; - thrust::detail::temporary_array a(exec, first, middle); - thrust::detail::temporary_array b(exec, middle, last); - - thrust::system::detail::internal::scalar::merge(a.begin(), a.end(), b.begin(), b.end(), first, comp); -} - -template -void inplace_merge_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 middle1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp) -{ - // XXX the type of exec should be: - // typedef decltype(select_system(first1, middle1, last1, first2)) DerivedPolicy; - typedef typename thrust::iterator_system::type DerivedPolicy; - typedef typename thrust::iterator_value::type value_type1; - typedef typename thrust::iterator_value::type value_type2; - - RandomAccessIterator2 middle2 = first2 + (middle1 - first1); - RandomAccessIterator2 last2 = first2 + (last1 - first1); - - // XXX assumes DerivedPolicy is default constructible - // XXX find a way to get a stateful exec into this function - // or simply pass scratch space - DerivedPolicy exec; - thrust::detail::temporary_array lhs1(exec, first1, middle1); - thrust::detail::temporary_array rhs1(exec, middle1, last1); - thrust::detail::temporary_array lhs2(exec, first2, middle2); - thrust::detail::temporary_array rhs2(exec, middle2, last2); - - thrust::system::detail::internal::scalar::merge_by_key - (lhs1.begin(), lhs1.end(), rhs1.begin(), rhs1.end(), - lhs2.begin(), rhs2.begin(), - first1, first2, comp); -} - -} // end namespace detail - -////////////// -// Key Sort // -////////////// - -template -void stable_merge_sort(RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - if (last - first < 32) - { - thrust::system::detail::internal::scalar::insertion_sort(first, last, comp); - } - else - { - RandomAccessIterator middle = first + (last - first) / 2; - - thrust::system::detail::internal::scalar::stable_merge_sort(first, middle, comp); - thrust::system::detail::internal::scalar::stable_merge_sort(middle, last, comp); - detail::inplace_merge(first, middle, last, comp); - } -} - - -//////////////////// -// Key-Value Sort // -//////////////////// - -template -void stable_merge_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp) -{ - if (last1 - first1 <= 32) - { - thrust::system::detail::internal::scalar::insertion_sort_by_key(first1, last1, first2, comp); - } - else - { - RandomAccessIterator1 middle1 = first1 + (last1 - first1) / 2; - RandomAccessIterator2 middle2 = first2 + (last1 - first1) / 2; - - thrust::system::detail::internal::scalar::stable_merge_sort_by_key(first1, middle1, first2, comp); - thrust::system::detail::internal::scalar::stable_merge_sort_by_key(middle1, last1, middle2, comp); - detail::inplace_merge_by_key(first1, middle1, last1, first2, comp); - } -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h b/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h deleted file mode 100644 index f37bf27cd1..0000000000 --- a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -void stable_primitive_sort(RandomAccessIterator first, - RandomAccessIterator last); - -template -void stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first); - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl deleted file mode 100644 index c22b15c0e4..0000000000 --- a/compat/thrust/system/detail/internal/scalar/stable_primitive_sort.inl +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ -namespace stable_primitive_sort_detail -{ - - -template - struct enable_if_bool_sort - : thrust::detail::enable_if< - thrust::detail::is_same< - bool, - typename thrust::iterator_value::type - >::value - > -{}; - - -template - struct disable_if_bool_sort - : thrust::detail::disable_if< - thrust::detail::is_same< - bool, - typename thrust::iterator_value::type - >::value - > -{}; - - - -template - typename enable_if_bool_sort::type - stable_primitive_sort(RandomAccessIterator first, RandomAccessIterator last) -{ - // use stable_partition if we're sorting bool - // stable_partition puts true values first, so we need to logical_not - scalar::stable_partition(first, last, thrust::logical_not()); -} - - -template - typename disable_if_bool_sort::type - stable_primitive_sort(RandomAccessIterator first, RandomAccessIterator last) -{ - // call stable_radix_sort - scalar::stable_radix_sort(first,last); -} - - -struct logical_not_first -{ - template - __host__ __device__ - bool operator()(Tuple t) - { - return !thrust::get<0>(t); - } -}; - - -template - typename enable_if_bool_sort::type - stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - // use stable_partition if we're sorting bool - // stable_partition puts true values first, so we need to logical_not - scalar::stable_partition(thrust::make_zip_iterator(thrust::make_tuple(keys_first, values_first)), - thrust::make_zip_iterator(thrust::make_tuple(keys_last, values_first)), - logical_not_first()); -} - - -template - typename disable_if_bool_sort::type - stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - // call stable_radix_sort_by_key - scalar::stable_radix_sort_by_key(keys_first, keys_last, values_first); -} - - -} - -template -void stable_primitive_sort(RandomAccessIterator first, - RandomAccessIterator last) -{ - scalar::stable_primitive_sort_detail::stable_primitive_sort(first,last); -} - -template -void stable_primitive_sort_by_key(RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first) -{ - scalar::stable_primitive_sort_detail::stable_primitive_sort_by_key(keys_first, keys_last, values_first); -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h b/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h deleted file mode 100644 index f2af22263a..0000000000 --- a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file stable_radix_sort.h - * \brief Sequential implementation of radix sort. - */ - -#pragma once - -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template -void stable_radix_sort(RandomAccessIterator begin, - RandomAccessIterator end); - -template -void stable_radix_sort_by_key(RandomAccessIterator1 keys_begin, - RandomAccessIterator1 keys_end, - RandomAccessIterator2 values_begin); - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl b/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl deleted file mode 100644 index 98846ab101..0000000000 --- a/compat/thrust/system/detail/internal/scalar/stable_radix_sort.inl +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ -namespace detail -{ - -template -struct RadixEncoder : public thrust::identity -{}; - -template <> -struct RadixEncoder : public thrust::unary_function -{ - unsigned char operator()(char x) const - { - if(std::numeric_limits::is_signed) - return x ^ static_cast(1) << (8 * sizeof(unsigned char) - 1); - else - return x; - } -}; - -template <> -struct RadixEncoder : public thrust::unary_function -{ - unsigned char operator()(signed char x) const - { - return x ^ static_cast(1) << (8 * sizeof(unsigned char) - 1); - } -}; - -template <> -struct RadixEncoder : public thrust::unary_function -{ - unsigned short operator()(short x) const - { - return x ^ static_cast(1) << (8 * sizeof(unsigned short) - 1); - } -}; - -template <> -struct RadixEncoder : public thrust::unary_function -{ - unsigned long operator()(long x) const - { - return x ^ static_cast(1) << (8 * sizeof(unsigned int) - 1); - } -}; - -template <> -struct RadixEncoder : public thrust::unary_function -{ - unsigned long operator()(long x) const - { - return x ^ static_cast(1) << (8 * sizeof(unsigned long) - 1); - } -}; - -template <> -struct RadixEncoder : public thrust::unary_function -{ - unsigned long long operator()(long long x) const - { - return x ^ static_cast(1) << (8 * sizeof(unsigned long long) - 1); - } -}; - -// ideally we'd use uint32 here and uint64 below -template <> -struct RadixEncoder : public thrust::unary_function -{ - thrust::detail::uint32_t operator()(float x) const - { - union { float f; thrust::detail::uint32_t i; } u; - u.f = x; - thrust::detail::uint32_t mask = -static_cast(u.i >> 31) | (static_cast(1) << 31); - return u.i ^ mask; - } -}; - -template <> -struct RadixEncoder : public thrust::unary_function -{ - thrust::detail::uint64_t operator()(double x) const - { - union { double f; thrust::detail::uint64_t i; } u; - u.f = x; - thrust::detail::uint64_t mask = -static_cast(u.i >> 63) | (static_cast(1) << 63); - return u.i ^ mask; - } -}; - - -template -void radix_sort(RandomAccessIterator1 keys1, - RandomAccessIterator2 keys2, - RandomAccessIterator3 vals1, - RandomAccessIterator4 vals2, - const size_t N) -{ - typedef typename thrust::iterator_value::type KeyType; - - typedef RadixEncoder Encoder; - typedef typename Encoder::result_type EncodedType; - - static const unsigned int NumHistograms = (8 * sizeof(EncodedType) + (RadixBits - 1)) / RadixBits; - static const unsigned int HistogramSize = 1 << RadixBits; - - static const EncodedType BitMask = static_cast((1 << RadixBits) - 1); - - Encoder encode; - - // storage for histograms - size_t histograms[NumHistograms][HistogramSize] = {{0}}; - - // see which passes can be eliminated - bool skip_shuffle[NumHistograms] = {false}; - - // false if most recent data is stored in (keys1,vals1) - bool flip = false; - - // compute histograms - for (size_t i = 0; i < N; i++) - { - const EncodedType x = encode(keys1[i]); - - for (unsigned int j = 0; j < NumHistograms; j++) - { - const EncodedType BitShift = RadixBits * j; - histograms[j][(x >> BitShift) & BitMask]++; - } - } - - // scan histograms - for (unsigned int i = 0; i < NumHistograms; i++) - { - size_t sum = 0; - - for (unsigned int j = 0; j < HistogramSize; j++) - { - size_t bin = histograms[i][j]; - - if (bin == N) - skip_shuffle[i] = true; - - histograms[i][j] = sum; - - sum = sum + bin; - } - } - - // shuffle keys and (optionally) values - for (unsigned int i = 0; i < NumHistograms; i++) - { - const EncodedType BitShift = static_cast(RadixBits * i); - - if (!skip_shuffle[i]) - { - if (flip) - { - for (size_t j = 0; j < N; j++) - { - const EncodedType x = encode(keys2[j]); - size_t position = histograms[i][(x >> BitShift) & BitMask]++; - - RandomAccessIterator1 temp_keys1 = keys1; - temp_keys1 += position; - - RandomAccessIterator2 temp_keys2 = keys2; - temp_keys2 += j; - - // keys1[position] = keys2[j] - *temp_keys1 = *temp_keys2; - - if (HasValues) - { - RandomAccessIterator3 temp_vals1 = vals1; - temp_vals1 += position; - - RandomAccessIterator4 temp_vals2 = vals2; - temp_vals2 += j; - - // vals1[position] = vals2[j] - *temp_vals1 = *temp_vals2; - } - } - } - else - { - for (size_t j = 0; j < N; j++) - { - const EncodedType x = encode(keys1[j]); - size_t position = histograms[i][(x >> BitShift) & BitMask]++; - - RandomAccessIterator1 temp_keys1 = keys1; - temp_keys1 += j; - - RandomAccessIterator2 temp_keys2 = keys2; - temp_keys2 += position; - - // keys2[position] = keys1[j]; - *temp_keys2 = *temp_keys1; - - if (HasValues) - { - RandomAccessIterator3 temp_vals1 = vals1; - temp_vals1 += j; - - RandomAccessIterator4 temp_vals2 = vals2; - temp_vals2 += position; - - // vals2[position] = vals1[j] - *temp_vals2 = *temp_vals1; - } - } - } - - flip = (flip) ? false : true; - } - } - - // ensure final values are in (keys1,vals1) - if (flip) - { - thrust::copy(keys2, keys2 + N, keys1); - if (HasValues) - thrust::copy(vals2, vals2 + N, vals1); - } -} - - -// Select best radix sort parameters based on sizeof(T) and input size -// These particular values were determined through empirical testing on a Core i7 950 CPU -template -struct radix_sort_dispatcher -{ -}; - -template <> -struct radix_sort_dispatcher<1> -{ - template - void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) - { - detail::radix_sort<8,false>(keys1, keys2, static_cast(0), static_cast(0), N); - } - template - void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N) - { - detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N); - } -}; - -template <> -struct radix_sort_dispatcher<2> -{ - template - void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) - { - if (N < (1 << 16)) - detail::radix_sort<8,false>(keys1, keys2, static_cast(0), static_cast(0), N); - else - detail::radix_sort<16,false>(keys1, keys2, static_cast(0), static_cast(0), N); - } - template - void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N) - { - if (N < (1 << 15)) - detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N); - else - detail::radix_sort<16,true>(keys1, keys2, vals1, vals2, N); - } -}; - -template <> -struct radix_sort_dispatcher<4> -{ - template - void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) - { - if (N < (1 << 22)) - detail::radix_sort<8,false>(keys1, keys2, static_cast(0), static_cast(0), N); - else - detail::radix_sort<4,false>(keys1, keys2, static_cast(0), static_cast(0), N); - } - template - void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N) - { - if (N < (1 << 22)) - detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N); - else - detail::radix_sort<3,true>(keys1, keys2, vals1, vals2, N); - } -}; - -template <> -struct radix_sort_dispatcher<8> -{ - template - void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, const size_t N) - { - if (N < (1 << 21)) - detail::radix_sort<8,false>(keys1, keys2, static_cast(0), static_cast(0), N); - else - detail::radix_sort<4,false>(keys1, keys2, static_cast(0), static_cast(0), N); - } - template - void operator()(RandomAccessIterator1 keys1, RandomAccessIterator2 keys2, RandomAccessIterator3 vals1, RandomAccessIterator4 vals2, const size_t N) - { - if (N < (1 << 21)) - detail::radix_sort<8,true>(keys1, keys2, vals1, vals2, N); - else - detail::radix_sort<3,true>(keys1, keys2, vals1, vals2, N); - } -}; - -template -void radix_sort(RandomAccessIterator1 keys1, - RandomAccessIterator2 keys2, - const size_t N) -{ - typedef typename thrust::iterator_value::type KeyType; - radix_sort_dispatcher()(keys1, keys2, N); -} - -template -void radix_sort(RandomAccessIterator1 keys1, - RandomAccessIterator2 keys2, - RandomAccessIterator3 vals1, - RandomAccessIterator4 vals2, - const size_t N) -{ - typedef typename thrust::iterator_value::type KeyType; - radix_sort_dispatcher()(keys1, keys2, vals1, vals2, N); -} - -} // namespace detail - -////////////// -// Key Sort // -////////////// - -template -void stable_radix_sort(RandomAccessIterator first, - RandomAccessIterator last) -{ - typedef typename thrust::iterator_system::type ExecutionPolicy; - typedef typename thrust::iterator_value::type KeyType; - - size_t N = last - first; - - // XXX assumes ExecutionPolicy is default constructible - // XXX consider how to get stateful systems into this function - ExecutionPolicy exec; - thrust::detail::temporary_array temp(exec, N); - - detail::radix_sort(first, temp.begin(), N); -} - - -//////////////////// -// Key-Value Sort // -//////////////////// - -template -void stable_radix_sort_by_key(RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2) -{ - // XXX the type of exec should be - // typedef decltype(select_system(first1,last1,first2)) system; - typedef typename thrust::iterator_system::type ExecutionPolicy; - typedef typename thrust::iterator_value::type KeyType; - typedef typename thrust::iterator_value::type ValueType; - - size_t N = last1 - first1; - - // XXX assumes ExecutionPolicy is default constructible - // XXX consider how to get stateful systems into this function - ExecutionPolicy exec; - thrust::detail::temporary_array temp1(exec, N); - thrust::detail::temporary_array temp2(exec, N); - - detail::radix_sort(first1, temp1.begin(), first2, temp2.begin(), N); -} - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/trivial_copy.h b/compat/thrust/system/detail/internal/scalar/trivial_copy.h deleted file mode 100644 index 8f008b54e4..0000000000 --- a/compat/thrust/system/detail/internal/scalar/trivial_copy.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file trivial_copy.h - * \brief Sequential copy algorithms for plain-old-data. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - T *trivial_copy_n(const T *first, - std::ptrdiff_t n, - T *result) -{ - std::memmove(result, first, n * sizeof(T)); - return result + n; -} // end trivial_copy_n() - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/unique.h b/compat/thrust/system/detail/internal/scalar/unique.h deleted file mode 100644 index cfc60c948a..0000000000 --- a/compat/thrust/system/detail/internal/scalar/unique.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file unique.h - * \brief Sequential implementations of unique algorithms. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - OutputIterator unique_copy(InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred) -{ - typedef typename thrust::iterator_traits::value_type T; - - if(first != last) - { - T prev = *first; - - for(++first; first != last; ++first) - { - T temp = *first; - - if (!binary_pred(prev, temp)) - { - *output = prev; - - ++output; - - prev = temp; - } - } - - *output = prev; - ++output; - } - - return output; -} // end unique_copy() - - -template - ForwardIterator unique(ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred) -{ - // unique_copy() permits in-situ operation - return thrust::system::detail::internal::scalar::unique_copy(first, last, first, binary_pred); -} // end unique() - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/internal/scalar/unique_by_key.h b/compat/thrust/system/detail/internal/scalar/unique_by_key.h deleted file mode 100644 index b0be2663e7..0000000000 --- a/compat/thrust/system/detail/internal/scalar/unique_by_key.h +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file unique_by_key.h - * \brief Sequential implementations of unique_by_key algorithms. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace detail -{ -namespace internal -{ -namespace scalar -{ - -template - thrust::pair - unique_by_key_copy(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - typedef typename thrust::iterator_traits::value_type InputKeyType; - typedef typename thrust::iterator_traits::value_type OutputValueType; - - if(keys_first != keys_last) - { - InputKeyType temp_key = *keys_first; - OutputValueType temp_value = *values_first; - - for(++keys_first, ++values_first; - keys_first != keys_last; - ++keys_first, ++values_first) - { - InputKeyType key = *keys_first; - OutputValueType value = *values_first; - - if(!binary_pred(temp_key, key)) - { - *keys_output = temp_key; - *values_output = temp_value; - - ++keys_output; - ++values_output; - - temp_key = key; - temp_value = value; - } - } - - *keys_output = temp_key; - *values_output = temp_value; - - ++keys_output; - ++values_output; - } - - return thrust::make_pair(keys_output, values_output); -} // end unique_by_key_copy() - - -template - thrust::pair - unique_by_key(ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred) -{ - // unique_by_key_copy() permits in-situ operation - return thrust::system::detail::internal::scalar::unique_by_key_copy(keys_first, keys_last, values_first, keys_first, values_first, binary_pred); -} // end unique_by_key() - -} // end namespace scalar -} // end namespace internal -} // end namespace detail -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/detail/system_error.inl b/compat/thrust/system/detail/system_error.inl deleted file mode 100644 index 74909bee8a..0000000000 --- a/compat/thrust/system/detail/system_error.inl +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include - -namespace thrust -{ - -namespace system -{ - - -system_error - ::system_error(error_code ec, const std::string &what_arg) - : std::runtime_error(what_arg), m_error_code(ec) -{ - -} // end system_error::system_error() - - -system_error - ::system_error(error_code ec, const char *what_arg) - : std::runtime_error(what_arg), m_error_code(ec) -{ - ; -} // end system_error::system_error() - - -system_error - ::system_error(error_code ec) - : std::runtime_error(""), m_error_code(ec) -{ - ; -} // end system_error::system_error() - - -system_error - ::system_error(int ev, const error_category &ecat, const std::string &what_arg) - : std::runtime_error(what_arg), m_error_code(ev,ecat) -{ - ; -} // end system_error::system_error() - - -system_error - ::system_error(int ev, const error_category &ecat, const char *what_arg) - : std::runtime_error(what_arg), m_error_code(ev,ecat) -{ - ; -} // end system_error::system_error() - - -system_error - ::system_error(int ev, const error_category &ecat) - : std::runtime_error(""), m_error_code(ev,ecat) -{ - ; -} // end system_error::system_error() - - -const error_code &system_error - ::code(void) const throw() -{ - return m_error_code; -} // end system_error::code() - - -const char *system_error - ::what(void) const throw() -{ - if(m_what.empty()) - { - try - { - m_what = this->std::runtime_error::what(); - if(m_error_code) - { - if(!m_what.empty()) m_what += ": "; - m_what += m_error_code.message(); - } - } - catch(...) - { - return std::runtime_error::what(); - } - } - - return m_what.c_str(); -} // end system_error::what() - - -} // end system - -} // end thrust - diff --git a/compat/thrust/system/error_code.h b/compat/thrust/system/error_code.h deleted file mode 100644 index 2b6582c937..0000000000 --- a/compat/thrust/system/error_code.h +++ /dev/null @@ -1,521 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file error_code.h - * \brief An object used to hold error values, such as those originating from the - * operating system or other low-level application program interfaces. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -namespace system -{ - - -/*! \addtogroup system_diagnostics - * \{ - */ - -class error_condition; -class error_code; - -/*! A metafunction returning whether or not the parameter is an \p error_code enum. - */ -template struct is_error_code_enum : public thrust::detail::false_type {}; - -/*! A metafunction returning whether or not the parameter is an \p error_condition enum. - */ -template struct is_error_condition_enum : public thrust::detail::false_type {}; - - -// XXX N3092 prefers enum class errc { ... } -namespace errc -{ - -enum errc_t -{ - address_family_not_supported = detail::eafnosupport, - address_in_use = detail::eaddrinuse, - address_not_available = detail::eaddrnotavail, - already_connected = detail::eisconn, - argument_list_too_long = detail::e2big, - argument_out_of_domain = detail::edom, - bad_address = detail::efault, - bad_file_descriptor = detail::ebadf, - bad_message = detail::ebadmsg, - broken_pipe = detail::epipe, - connection_aborted = detail::econnaborted, - connection_already_in_progress = detail::ealready, - connection_refused = detail::econnrefused, - connection_reset = detail::econnreset, - cross_device_link = detail::exdev, - destination_address_required = detail::edestaddrreq, - device_or_resource_busy = detail::ebusy, - directory_not_empty = detail::enotempty, - executable_format_error = detail::enoexec, - file_exists = detail::eexist, - file_too_large = detail::efbig, - filename_too_long = detail::enametoolong, - function_not_supported = detail::enosys, - host_unreachable = detail::ehostunreach, - identifier_removed = detail::eidrm, - illegal_byte_sequence = detail::eilseq, - inappropriate_io_control_operation = detail::enotty, - interrupted = detail::eintr, - invalid_argument = detail::einval, - invalid_seek = detail::espipe, - io_error = detail::eio, - is_a_directory = detail::eisdir, - message_size = detail::emsgsize, - network_down = detail::enetdown, - network_reset = detail::enetreset, - network_unreachable = detail::enetunreach, - no_buffer_space = detail::enobufs, - no_child_process = detail::echild, - no_link = detail::enolink, - no_lock_available = detail::enolck, - no_message_available = detail::enodata, - no_message = detail::enomsg, - no_protocol_option = detail::enoprotoopt, - no_space_on_device = detail::enospc, - no_stream_resources = detail::enosr, - no_such_device_or_address = detail::enxio, - no_such_device = detail::enodev, - no_such_file_or_directory = detail::enoent, - no_such_process = detail::esrch, - not_a_directory = detail::enotdir, - not_a_socket = detail::enotsock, - not_a_stream = detail::enostr, - not_connected = detail::enotconn, - not_enough_memory = detail::enomem, - not_supported = detail::enotsup, - operation_canceled = detail::ecanceled, - operation_in_progress = detail::einprogress, - operation_not_permitted = detail::eperm, - operation_not_supported = detail::eopnotsupp, - operation_would_block = detail::ewouldblock, - owner_dead = detail::eownerdead, - permission_denied = detail::eacces, - protocol_error = detail::eproto, - protocol_not_supported = detail::eprotonosupport, - read_only_file_system = detail::erofs, - resource_deadlock_would_occur = detail::edeadlk, - resource_unavailable_try_again = detail::eagain, - result_out_of_range = detail::erange, - state_not_recoverable = detail::enotrecoverable, - stream_timeout = detail::etime, - text_file_busy = detail::etxtbsy, - timed_out = detail::etimedout, - too_many_files_open_in_system = detail::enfile, - too_many_files_open = detail::emfile, - too_many_links = detail::emlink, - too_many_symbolic_link_levels = detail::eloop, - value_too_large = detail::eoverflow, - wrong_protocol_type = detail::eprototype -}; // end errc_t - -} // end namespace errc - - -/*! Specialization of \p is_error_condition_enum for \p errc::errc_t - */ -template<> struct is_error_condition_enum : public thrust::detail::true_type {}; - - -// [19.5.1.1] class error_category - -/*! \brief The class \p error_category serves as a base class for types used to identify the - * source and encoding of a particular category of error code. Classes may be derived - * from \p error_category to support categories of errors in addition to those defined - * in the C++ International Standard. - */ -class error_category -{ - public: - /*! Destructor does nothing. - */ - inline virtual ~error_category(void); - - // XXX enable upon c++0x - // error_category(const error_category &) = delete; - // error_category &operator=(const error_category &) = delete; - - /*! \return A string naming the error category. - */ - inline virtual const char *name(void) const = 0; - - /*! \return \p error_condition(ev, *this). - */ - inline virtual error_condition default_error_condition(int ev) const; - - /*! \return default_error_condition(code) == condition - */ - inline virtual bool equivalent(int code, const error_condition &condition) const; - - /*! \return *this == code.category() && code.value() == condition - */ - inline virtual bool equivalent(const error_code &code, int condition) const; - - /*! \return A string that describes the error condition denoted by \p ev. - */ - virtual std::string message(int ev) const = 0; - - /*! \return *this == &rhs - */ - inline bool operator==(const error_category &rhs) const; - - /*! \return !(*this == rhs) - */ - inline bool operator!=(const error_category &rhs) const; - - /*! \return less()(this, &rhs) - * \note \c less provides a total ordering for pointers. - */ - inline bool operator<(const error_category &rhs) const; -}; // end error_category - - -// [19.5.1.5] error_category objects - - -/*! \return A reference to an object of a type derived from class \p error_category. - * \note The object's \p default_error_condition and \p equivalent virtual functions - * shall behave as specified for the class \p error_category. The object's - * \p name virtual function shall return a pointer to the string "generic". - */ -inline const error_category &generic_category(void); - - -/*! \return A reference to an object of a type derived from class \p error_category. - * \note The object's \p equivalent virtual functions shall behave as specified for - * class \p error_category. The object's \p name virtual function shall return - * a pointer to the string "system". The object's \p default_error_condition - * virtual function shall behave as follows: - * - * If the argument ev corresponds to a POSIX errno value - * \c posv, the function shall return error_condition(ev,generic_category()). - * Otherwise, the function shall return error_condition(ev,system_category()). - * What constitutes correspondence for any given operating system is unspecified. - */ -inline const error_category &system_category(void); - - -// [19.5.2] Class error_code - - -/*! \brief The class \p error_code describes an object used to hold error code values, such as - * those originating from the operating system or other low-level application program - * interfaces. - */ -class error_code -{ - public: - // [19.5.2.2] constructors: - - /*! Effects: Constructs an object of type \p error_code. - * \post value() == 0 and category() == &system_category(). - */ - inline error_code(void); - - /*! Effects: Constructs an object of type \p error_code. - * \post value() == val and category() == &cat. - */ - inline error_code(int val, const error_category &cat); - - /*! Effects: Constructs an object of type \p error_code. - * \post *this == make_error_code(e). - */ - template - error_code(ErrorCodeEnum e -// XXX WAR msvc's problem with enable_if -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - , typename thrust::detail::enable_if::value>::type * = 0 -#endif // THRUST_HOST_COMPILER_MSVC - ); - - // [19.5.2.3] modifiers: - - /*! \post value() == val and category() == &cat. - */ - inline void assign(int val, const error_category &cat); - - /*! \post *this == make_error_code(e). - */ - template -// XXX WAR msvc's problem with enable_if -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - typename thrust::detail::enable_if::value, error_code>::type & -#else - error_code & -#endif // THRUST_HOST_COMPILER_MSVC - operator=(ErrorCodeEnum e); - - /*! \post value() == 0 and category() == system_category(). - */ - inline void clear(void); - - // [19.5.2.4] observers: - - /*! \return An integral value of this \p error_code object. - */ - inline int value(void) const; - - /*! \return An \p error_category describing the category of this \p error_code object. - */ - inline const error_category &category(void) const; - - /*! \return category().default_error_condition(). - */ - inline error_condition default_error_condition(void) const; - - /*! \return category().message(value()). - */ - inline std::string message(void) const; - - // XXX replace the below upon c++0x - // inline explicit operator bool (void) const; - - /*! \return value() != 0. - */ - inline operator bool (void) const; - - /*! \cond - */ - private: - int m_val; - const error_category *m_cat; - /*! \endcond - */ -}; // end error_code - - -// [19.5.2.5] Class error_code non-member functions - - -// XXX replace errc::errc_t with errc upon c++0x -/*! \return error_code(static_cast(e), generic_category()) - */ -inline error_code make_error_code(errc::errc_t e); - - -/*! \return lhs.category() < rhs.category() || lhs.category() == rhs.category() && lhs.value() < rhs.value(). - */ -inline bool operator<(const error_code &lhs, const error_code &rhs); - - -/*! Effects: os << ec.category().name() << ':' << ec.value(). - */ -template - std::basic_ostream& - operator<<(std::basic_ostream& os, const error_code &ec); - - -// [19.5.3] class error_condition - - -/*! \brief The class \p error_condition describes an object used to hold values identifying - * error conditions. - * - * \note \p error_condition values are portable abstractions, while \p error_code values - * are implementation specific. - */ -class error_condition -{ - public: - // [19.5.3.2] constructors - - /*! Constructs an object of type \p error_condition. - * \post value() == 0. - * \post category() == generic_category(). - */ - inline error_condition(void); - - /*! Constructs an object of type \p error_condition. - * \post value() == val. - * \post category() == cat. - */ - inline error_condition(int val, const error_category &cat); - - /*! Constructs an object of type \p error_condition. - * \post *this == make_error_condition(e). - * \note This constructor shall not participate in overload resolution unless - * is_error_condition_enum::value is true. - */ - template - error_condition(ErrorConditionEnum e -// XXX WAR msvc's problem with enable_if -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - , typename thrust::detail::enable_if::value>::type * = 0 -#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - ); - - // [19.5.3.3] modifiers - - /*! Assigns to this \p error_code object from an error value and an \p error_category. - * \param val The new value to return from value(). - * \param cat The new \p error_category to return from category(). - * \post value() == val. - * \post category() == cat. - */ - inline void assign(int val, const error_category &cat); - - /*! Assigns to this \p error_code object from an error condition enumeration. - * \return *this - * \post *this == make_error_condition(e). - * \note This operator shall not participate in overload resolution unless - * is_error_condition_enum::value is true. - */ - template -// XXX WAR msvc's problem with enable_if -#if THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - typename thrust::detail::enable_if::value, error_condition>::type & -#else - error_condition & -#endif // THRUST_HOST_COMPILER != THRUST_HOST_COMPILER_MSVC - operator=(ErrorConditionEnum e); - - /*! Clears this \p error_code object. - * \post value == 0 - * \post category() == generic_category(). - */ - inline void clear(void); - - // [19.5.3.4] observers - - /*! \return The value encoded by this \p error_condition. - */ - inline int value(void) const; - - /*! \return A const reference to the \p error_category encoded by this \p error_condition. - */ - inline const error_category &category(void) const; - - /*! \return category().message(value()). - */ - inline std::string message(void) const; - - // XXX replace below with this upon c++0x - //explicit operator bool (void) const; - - /*! \return value() != 0. - */ - inline operator bool (void) const; - - /*! \cond - */ - - private: - int m_val; - const error_category *m_cat; - - /*! \endcond - */ -}; // end error_condition - - - -// [19.5.3.5] Class error_condition non-member functions - -// XXX replace errc::errc_t with errc upon c++0x -/*! \return error_condition(static_cast(e), generic_category()). - */ -inline error_condition make_error_condition(errc::errc_t e); - - -/*! \return lhs.category() < rhs.category() || lhs.category() == rhs.category() && lhs.value() < rhs.value(). - */ -inline bool operator<(const error_condition &lhs, const error_condition &rhs); - - -// [19.5.4] Comparison operators - - -/*! \return lhs.category() == rhs.category() && lhs.value() == rhs.value(). - */ -inline bool operator==(const error_code &lhs, const error_code &rhs); - - -/*! \return lhs.category().equivalent(lhs.value(), rhs) || rhs.category().equivalent(lhs,rhs.value()). - */ -inline bool operator==(const error_code &lhs, const error_condition &rhs); - - -/*! \return rhs.category().equivalent(lhs.value(), lhs) || lhs.category().equivalent(rhs, lhs.value()). - */ -inline bool operator==(const error_condition &lhs, const error_code &rhs); - - -/*! \return lhs.category() == rhs.category() && lhs.value() == rhs.value() - */ -inline bool operator==(const error_condition &lhs, const error_condition &rhs); - - -/*! \return !(lhs == rhs) - */ -inline bool operator!=(const error_code &lhs, const error_code &rhs); - - -/*! \return !(lhs == rhs) - */ -inline bool operator!=(const error_code &lhs, const error_condition &rhs); - - -/*! \return !(lhs == rhs) - */ -inline bool operator!=(const error_condition &lhs, const error_code &rhs); - - -/*! \return !(lhs == rhs) - */ -inline bool operator!=(const error_condition &lhs, const error_condition &rhs); - -/*! \} // end system_diagnostics - */ - - -} // end system - - -// import names into thrust:: -using system::error_category; -using system::error_code; -using system::error_condition; -using system::is_error_code_enum; -using system::is_error_condition_enum; -using system::make_error_code; -using system::make_error_condition; - -// XXX replace with using system::errc upon c++0x -namespace errc = system::errc; - -using system::generic_category; -using system::system_category; - -} // end thrust - -#include -#include -#include - diff --git a/compat/thrust/system/omp/detail/adjacent_difference.h b/compat/thrust/system/omp/detail/adjacent_difference.h deleted file mode 100644 index 0bbc1884cc..0000000000 --- a/compat/thrust/system/omp/detail/adjacent_difference.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template - OutputIterator adjacent_difference(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - // omp prefers generic::adjacent_difference to cpp::adjacent_difference - return thrust::system::detail::generic::adjacent_difference(exec, first, last, result, binary_op); -} // end adjacent_difference() - -} // end detail -} // end omp -} // end system -} // end thrust - diff --git a/compat/thrust/system/omp/detail/assign_value.h b/compat/thrust/system/omp/detail/assign_value.h deleted file mode 100644 index eda3b977b8..0000000000 --- a/compat/thrust/system/omp/detail/assign_value.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits assign_value -#include - diff --git a/compat/thrust/system/omp/detail/binary_search.h b/compat/thrust/system/omp/detail/binary_search.h deleted file mode 100644 index 254e6fd445..0000000000 --- a/compat/thrust/system/omp/detail/binary_search.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template -ForwardIterator lower_bound(execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp) -{ - // omp prefers generic::lower_bound to cpp::lower_bound - return thrust::system::detail::generic::lower_bound(exec, begin, end, value, comp); -} - - -template -ForwardIterator upper_bound(execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp) -{ - // omp prefers generic::upper_bound to cpp::upper_bound - return thrust::system::detail::generic::upper_bound(exec, begin, end, value, comp); -} - - -template -bool binary_search(execution_policy &exec, - ForwardIterator begin, - ForwardIterator end, - const T& value, - StrictWeakOrdering comp) -{ - // omp prefers generic::binary_search to cpp::binary_search - return thrust::system::detail::generic::binary_search(exec, begin, end, value, comp); -} - - -} // end detail -} // end omp -} // end system -} // end thrust - diff --git a/compat/thrust/system/omp/detail/copy.h b/compat/thrust/system/omp/detail/copy.h deleted file mode 100644 index b23ac18801..0000000000 --- a/compat/thrust/system/omp/detail/copy.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template -OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -template -OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result); - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/copy.inl b/compat/thrust/system/omp/detail/copy.inl deleted file mode 100644 index 915ff92d70..0000000000 --- a/compat/thrust/system/omp/detail/copy.inl +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ -namespace dispatch -{ - -template - OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - thrust::incrementable_traversal_tag) -{ - return thrust::system::cpp::detail::copy(exec, first, last, result); -} // end copy() - - -template - OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - thrust::random_access_traversal_tag) -{ - // XXX WAR problems reconciling unrelated types such as omp & tbb - // reinterpret iterators as the policy we were passed - // this ensures that generic::copy's implementation, which eventually results in - // zip_iterator works correctly - thrust::detail::tagged_iterator retagged_result(result); - - return thrust::system::detail::generic::copy(exec, thrust::reinterpret_tag(first), thrust::reinterpret_tag(last), retagged_result).base(); -} // end copy() - - -template - OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result, - thrust::incrementable_traversal_tag) -{ - return thrust::system::cpp::detail::copy_n(exec, first, n, result); -} // end copy_n() - - -template - OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result, - thrust::random_access_traversal_tag) -{ - // XXX WAR problems reconciling unrelated types such as omp & tbb - // reinterpret iterators as the policy we were passed - // this ensures that generic::copy's implementation, which eventually results in - // zip_iterator works correctly - thrust::detail::tagged_iterator retagged_result(result); - - return thrust::system::detail::generic::copy_n(exec, thrust::reinterpret_tag(first), n, retagged_result).base(); -} // end copy_n() - -} // end dispatch - - -template -OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - typedef typename thrust::iterator_traversal::type traversal1; - typedef typename thrust::iterator_traversal::type traversal2; - - typedef typename thrust::detail::minimum_type::type traversal; - - // dispatch on minimum traversal - return thrust::system::omp::detail::dispatch::copy(exec, first,last,result,traversal()); -} // end copy() - - - -template -OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result) -{ - typedef typename thrust::iterator_traversal::type traversal1; - typedef typename thrust::iterator_traversal::type traversal2; - - typedef typename thrust::detail::minimum_type::type traversal; - - // dispatch on minimum traversal - return thrust::system::omp::detail::dispatch::copy_n(exec,first,n,result,traversal()); -} // end copy_n() - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/copy_if.h b/compat/thrust/system/omp/detail/copy_if.h deleted file mode 100644 index 46754a9f21..0000000000 --- a/compat/thrust/system/omp/detail/copy_if.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - OutputIterator copy_if(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - - -} // end detail -} // end omp -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/omp/detail/copy_if.inl b/compat/thrust/system/omp/detail/copy_if.inl deleted file mode 100644 index 1af6a215a6..0000000000 --- a/compat/thrust/system/omp/detail/copy_if.inl +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - OutputIterator copy_if(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - // omp prefers generic::copy_if to cpp::copy_if - return thrust::system::detail::generic::copy_if(exec, first, last, stencil, result, pred); -} // end copy_if() - - -} // end detail -} // end omp -} // end system -} // end thrust - diff --git a/compat/thrust/system/omp/detail/count.h b/compat/thrust/system/omp/detail/count.h deleted file mode 100644 index da31ee8700..0000000000 --- a/compat/thrust/system/omp/detail/count.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits count -#include - diff --git a/compat/thrust/system/omp/detail/default_decomposition.h b/compat/thrust/system/omp/detail/default_decomposition.h deleted file mode 100644 index f1904c29c6..0000000000 --- a/compat/thrust/system/omp/detail/default_decomposition.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file default_decomposition.h - * \brief Return a decomposition that is appropriate for the OpenMP backend. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template -thrust::system::detail::internal::uniform_decomposition default_decomposition(IndexType n); - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/default_decomposition.inl b/compat/thrust/system/omp/detail/default_decomposition.inl deleted file mode 100644 index 366b4f56c2..0000000000 --- a/compat/thrust/system/omp/detail/default_decomposition.inl +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -// don't attempt to #include this file without omp support -#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) -#include -#endif // omp support - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template -thrust::system::detail::internal::uniform_decomposition default_decomposition(IndexType n) -{ - // we're attempting to launch an omp kernel, assert we're compiling with omp support - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to OpenMP support in your compiler. X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - -#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) - return thrust::system::detail::internal::uniform_decomposition(n, 1, omp_get_num_procs()); -#else - return thrust::system::detail::internal::uniform_decomposition(n, 1, 1); -#endif -} - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/equal.h b/compat/thrust/system/omp/detail/equal.h deleted file mode 100644 index 74e55183d9..0000000000 --- a/compat/thrust/system/omp/detail/equal.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits equal -#include - diff --git a/compat/thrust/system/omp/detail/execution_policy.h b/compat/thrust/system/omp/detail/execution_policy.h deleted file mode 100644 index 1b06224217..0000000000 --- a/compat/thrust/system/omp/detail/execution_policy.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -// put the canonical tag in the same ns as the backend's entry points -namespace omp -{ -namespace detail -{ - -// this awkward sequence of definitions arise -// from the desire both for tag to derive -// from execution_policy and for execution_policy -// to convert to tag (when execution_policy is not -// an ancestor of tag) - -// forward declaration of tag -struct tag; - -// forward declaration of execution_policy -template struct execution_policy; - -// specialize execution_policy for tag -template<> - struct execution_policy - : thrust::system::cpp::detail::execution_policy -{}; - -// tag's definition comes before the -// generic definition of execution_policy -struct tag : execution_policy {}; - -// allow conversion to tag when it is not a successor -template - struct execution_policy - : thrust::system::cpp::detail::execution_policy -{ - // allow conversion to tag - inline operator tag () const - { - return tag(); - } -}; - - -// overloads of select_system - -// XXX select_system(tbb, omp) & select_system(omp, tbb) are ambiguous -// because both convert to cpp without these overloads, which we -// arbitrarily define in the omp backend - -template -inline __host__ __device__ - System1 select_system(execution_policy s, thrust::system::tbb::detail::execution_policy) -{ - return thrust::detail::derived_cast(s); -} // end select_system() - - -template -inline __host__ __device__ - System2 select_system(thrust::system::tbb::detail::execution_policy, execution_policy s) -{ - return thrust::detail::derived_cast(s); -} // end select_system() - - -} // end detail - -// alias execution_policy and tag here -using thrust::system::omp::detail::execution_policy; -using thrust::system::omp::detail::tag; - -} // end omp -} // end system - -// alias items at top-level -namespace omp -{ - -using thrust::system::omp::execution_policy; -using thrust::system::omp::tag; - -} // end omp -} // end thrust - diff --git a/compat/thrust/system/omp/detail/extrema.h b/compat/thrust/system/omp/detail/extrema.h deleted file mode 100644 index fb96770b97..0000000000 --- a/compat/thrust/system/omp/detail/extrema.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template -ForwardIterator max_element(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // omp prefers generic::max_element to cpp::max_element - return thrust::system::detail::generic::max_element(exec, first, last, comp); -} // end max_element() - -template -ForwardIterator min_element(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // omp prefers generic::min_element to cpp::min_element - return thrust::system::detail::generic::min_element(exec, first, last, comp); -} // end min_element() - -template -thrust::pair minmax_element(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // omp prefers generic::minmax_element to cpp::minmax_element - return thrust::system::detail::generic::minmax_element(exec, first, last, comp); -} // end minmax_element() - -} // end detail -} // end omp -} // end system -} // end thrust - - diff --git a/compat/thrust/system/omp/detail/fill.h b/compat/thrust/system/omp/detail/fill.h deleted file mode 100644 index 5219e1c7c5..0000000000 --- a/compat/thrust/system/omp/detail/fill.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits fill -#include - diff --git a/compat/thrust/system/omp/detail/find.h b/compat/thrust/system/omp/detail/find.h deleted file mode 100644 index a8dca5ad4d..0000000000 --- a/compat/thrust/system/omp/detail/find.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file find.h - * \brief OpenMP implementation of find_if. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template -InputIterator find_if(execution_policy &exec, - InputIterator first, - InputIterator last, - Predicate pred) -{ - // omp prefers generic::find_if to cpp::find_if - return thrust::system::detail::generic::find_if(exec, first, last, pred); -} - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/for_each.h b/compat/thrust/system/omp/detail/for_each.h deleted file mode 100644 index 1030623ec0..0000000000 --- a/compat/thrust/system/omp/detail/for_each.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file for_each.h - * \brief Defines the interface for a function that executes a - * function or functional for each value in a given range. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template - RandomAccessIterator for_each(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - UnaryFunction f); - -template - RandomAccessIterator for_each_n(execution_policy &exec, - RandomAccessIterator first, - Size n, - UnaryFunction f); - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/for_each.inl b/compat/thrust/system/omp/detail/for_each.inl deleted file mode 100644 index c6ab8277ba..0000000000 --- a/compat/thrust/system/omp/detail/for_each.inl +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file for_each.inl - * \brief Inline file for for_each.h. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template -RandomAccessIterator for_each_n(execution_policy &, - RandomAccessIterator first, - Size n, - UnaryFunction f) -{ - // we're attempting to launch an omp kernel, assert we're compiling with omp support - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to enable OpenMP support in your compiler. X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - - if (n <= 0) return first; //empty range - - // create a wrapped function for f - typedef typename thrust::iterator_reference::type reference; - thrust::detail::host_function wrapped_f(f); - -// do not attempt to compile the body of this function, which depends on #pragma omp, -// without support from the compiler -// XXX implement the body of this function in another file to eliminate this ugliness -#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) - // use a signed type for the iteration variable or suffer the consequences of warnings - typedef typename thrust::iterator_difference::type DifferenceType; - DifferenceType signed_n = n; -#pragma omp parallel for - for(DifferenceType i = 0; - i < signed_n; - ++i) - { - RandomAccessIterator temp = first + i; - wrapped_f(*temp); - } -#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE - - return first + n; -} // end for_each_n() - -template - RandomAccessIterator for_each(execution_policy &s, - RandomAccessIterator first, - RandomAccessIterator last, - UnaryFunction f) -{ - return omp::detail::for_each_n(s, first, thrust::distance(first,last), f); -} // end for_each() - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/gather.h b/compat/thrust/system/omp/detail/gather.h deleted file mode 100644 index dfb7d7fc2d..0000000000 --- a/compat/thrust/system/omp/detail/gather.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits gather -#include - diff --git a/compat/thrust/system/omp/detail/generate.h b/compat/thrust/system/omp/detail/generate.h deleted file mode 100644 index 0cb33b9336..0000000000 --- a/compat/thrust/system/omp/detail/generate.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits generate -#include - diff --git a/compat/thrust/system/omp/detail/get_value.h b/compat/thrust/system/omp/detail/get_value.h deleted file mode 100644 index e376e65749..0000000000 --- a/compat/thrust/system/omp/detail/get_value.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits get_value -#include - diff --git a/compat/thrust/system/omp/detail/inner_product.h b/compat/thrust/system/omp/detail/inner_product.h deleted file mode 100644 index 351421a577..0000000000 --- a/compat/thrust/system/omp/detail/inner_product.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits inner_product -#include - diff --git a/compat/thrust/system/omp/detail/iter_swap.h b/compat/thrust/system/omp/detail/iter_swap.h deleted file mode 100644 index 16176ec69b..0000000000 --- a/compat/thrust/system/omp/detail/iter_swap.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits iter_swap -#include - diff --git a/compat/thrust/system/omp/detail/logical.h b/compat/thrust/system/omp/detail/logical.h deleted file mode 100644 index b2a80de70f..0000000000 --- a/compat/thrust/system/omp/detail/logical.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits logical -#include - diff --git a/compat/thrust/system/omp/detail/malloc_and_free.h b/compat/thrust/system/omp/detail/malloc_and_free.h deleted file mode 100644 index 811a552a4f..0000000000 --- a/compat/thrust/system/omp/detail/malloc_and_free.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits malloc and free -#include - diff --git a/compat/thrust/system/omp/detail/memory.inl b/compat/thrust/system/omp/detail/memory.inl deleted file mode 100644 index 7d53de60a1..0000000000 --- a/compat/thrust/system/omp/detail/memory.inl +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ - - -template - template - reference & - reference - ::operator=(const reference &other) -{ - return super_t::operator=(other); -} // end reference::operator=() - -template - reference & - reference - ::operator=(const value_type &x) -{ - return super_t::operator=(x); -} // end reference::operator=() - -template -__host__ __device__ -void swap(reference a, reference b) -{ - a.swap(b); -} // end swap() - -namespace detail -{ - -// XXX circular #inclusion problems cause the compiler to believe that cpp::malloc -// is not defined -// WAR the problem by using adl to call cpp::malloc, which requires it to depend -// on a template parameter -template - pointer malloc_workaround(Tag t, std::size_t n) -{ - return pointer(malloc(t, n)); -} // end malloc_workaround() - -// XXX circular #inclusion problems cause the compiler to believe that cpp::free -// is not defined -// WAR the problem by using adl to call cpp::free, which requires it to depend -// on a template parameter -template - void free_workaround(Tag t, pointer ptr) -{ - free(t, ptr.get()); -} // end free_workaround() - -} // end detail - -inline pointer malloc(std::size_t n) -{ - // XXX this is how we'd like to implement this function, - // if not for circular #inclusion problems: - // - // return pointer(thrust::system::cpp::malloc(n)) - // - return detail::malloc_workaround(cpp::tag(), n); -} // end malloc() - -template -pointer malloc(std::size_t n) -{ - pointer raw_ptr = thrust::system::omp::malloc(sizeof(T) * n); - return pointer(reinterpret_cast(raw_ptr.get())); -} // end malloc() - -inline void free(pointer ptr) -{ - // XXX this is how we'd like to implement this function, - // if not for circular #inclusion problems: - // - // thrust::system::cpp::free(ptr) - // - detail::free_workaround(cpp::tag(), ptr); -} // end free() - -} // end omp -} // end system -} // end thrust - diff --git a/compat/thrust/system/omp/detail/merge.h b/compat/thrust/system/omp/detail/merge.h deleted file mode 100644 index a7047aa028..0000000000 --- a/compat/thrust/system/omp/detail/merge.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits merge -#include - diff --git a/compat/thrust/system/omp/detail/mismatch.h b/compat/thrust/system/omp/detail/mismatch.h deleted file mode 100644 index 03980cfcd5..0000000000 --- a/compat/thrust/system/omp/detail/mismatch.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits mismatch -#include - diff --git a/compat/thrust/system/omp/detail/par.h b/compat/thrust/system/omp/detail/par.h deleted file mode 100644 index fa6d18e64c..0000000000 --- a/compat/thrust/system/omp/detail/par.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -struct par_t : thrust::system::omp::detail::execution_policy -{ - par_t() : thrust::system::omp::detail::execution_policy() {} - - template - thrust::detail::execute_with_allocator - operator()(Allocator &alloc) const - { - return thrust::detail::execute_with_allocator(alloc); - } -}; - - -} // end detail - - -static const detail::par_t par; - - -} // end omp -} // end system - - -// alias par here -namespace omp -{ - - -using thrust::system::omp::par; - - -} // end omp -} // end thrust - diff --git a/compat/thrust/system/omp/detail/partition.h b/compat/thrust/system/omp/detail/partition.h deleted file mode 100644 index edcbc30834..0000000000 --- a/compat/thrust/system/omp/detail/partition.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief OpenMP implementation of reduce algorithms. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - ForwardIterator stable_partition(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - -template - ForwardIterator stable_partition(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - -template - thrust::pair - stable_partition_copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - -template - thrust::pair - stable_partition_copy(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/partition.inl b/compat/thrust/system/omp/detail/partition.inl deleted file mode 100644 index da629e5c69..0000000000 --- a/compat/thrust/system/omp/detail/partition.inl +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief OpenMP implementation of reduce algorithms. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - ForwardIterator stable_partition(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - // omp prefers generic::stable_partition to cpp::stable_partition - return thrust::system::detail::generic::stable_partition(exec, first, last, pred); -} // end stable_partition() - - -template - ForwardIterator stable_partition(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - // omp prefers generic::stable_partition to cpp::stable_partition - return thrust::system::detail::generic::stable_partition(exec, first, last, stencil, pred); -} // end stable_partition() - - -template - thrust::pair - stable_partition_copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - // omp prefers generic::stable_partition_copy to cpp::stable_partition_copy - return thrust::system::detail::generic::stable_partition_copy(exec, first, last, out_true, out_false, pred); -} // end stable_partition_copy() - - -template - thrust::pair - stable_partition_copy(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - // omp prefers generic::stable_partition_copy to cpp::stable_partition_copy - return thrust::system::detail::generic::stable_partition_copy(exec, first, last, stencil, out_true, out_false, pred); -} // end stable_partition_copy() - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/reduce.h b/compat/thrust/system/omp/detail/reduce.h deleted file mode 100644 index 0cc5cebc2f..0000000000 --- a/compat/thrust/system/omp/detail/reduce.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief OpenMP implementation of reduce algorithms. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - OutputType reduce(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputType init, - BinaryFunction binary_op); - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/reduce.inl b/compat/thrust/system/omp/detail/reduce.inl deleted file mode 100644 index 1347bfd004..0000000000 --- a/compat/thrust/system/omp/detail/reduce.inl +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - OutputType reduce(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputType init, - BinaryFunction binary_op) -{ - typedef typename thrust::iterator_difference::type difference_type; - - const difference_type n = thrust::distance(first,last); - - // determine first and second level decomposition - thrust::system::detail::internal::uniform_decomposition decomp1 = thrust::system::omp::detail::default_decomposition(n); - thrust::system::detail::internal::uniform_decomposition decomp2(decomp1.size() + 1, 1, 1); - - // allocate storage for the initializer and partial sums - // XXX use select_system for Tag - thrust::detail::temporary_array partial_sums(exec, decomp1.size() + 1); - - // set first element of temp array to init - partial_sums[0] = init; - - // accumulate partial sums (first level reduction) - thrust::system::omp::detail::reduce_intervals(exec, first, partial_sums.begin() + 1, binary_op, decomp1); - - // reduce partial sums (second level reduction) - thrust::system::omp::detail::reduce_intervals(exec, partial_sums.begin(), partial_sums.begin(), binary_op, decomp2); - - return partial_sums[0]; -} // end reduce() - - -} // end detail -} // end omp -} // end system -} // end thrust - diff --git a/compat/thrust/system/omp/detail/reduce_by_key.h b/compat/thrust/system/omp/detail/reduce_by_key.h deleted file mode 100644 index d7243ee0ca..0000000000 --- a/compat/thrust/system/omp/detail/reduce_by_key.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief OpenMP implementation of reduce algorithms. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - thrust::pair - reduce_by_key(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op); - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/reduce_by_key.inl b/compat/thrust/system/omp/detail/reduce_by_key.inl deleted file mode 100644 index 91402d8280..0000000000 --- a/compat/thrust/system/omp/detail/reduce_by_key.inl +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template - thrust::pair - reduce_by_key(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - // omp prefers generic::reduce_by_key to cpp::reduce_by_key - return thrust::system::detail::generic::reduce_by_key(exec, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); -} // end reduce_by_key() - - -} // end detail -} // end omp -} // end system -} // end thrust - diff --git a/compat/thrust/system/omp/detail/reduce_intervals.h b/compat/thrust/system/omp/detail/reduce_intervals.h deleted file mode 100644 index 7bce2074a3..0000000000 --- a/compat/thrust/system/omp/detail/reduce_intervals.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce_intervals.h - * \brief OpenMP implementations of reduce_intervals algorithms. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template -void reduce_intervals(execution_policy &exec, - InputIterator input, - OutputIterator output, - BinaryFunction binary_op, - Decomposition decomp); - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/reduce_intervals.inl b/compat/thrust/system/omp/detail/reduce_intervals.inl deleted file mode 100644 index 0752b8aab9..0000000000 --- a/compat/thrust/system/omp/detail/reduce_intervals.inl +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template -void reduce_intervals(execution_policy &, - InputIterator input, - OutputIterator output, - BinaryFunction binary_op, - Decomposition decomp) -{ - // we're attempting to launch an omp kernel, assert we're compiling with omp support - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to enable OpenMP support in your compiler. X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - -#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) - typedef typename thrust::iterator_value::type OutputType; - - // wrap binary_op - thrust::detail::host_function wrapped_binary_op(binary_op); - - typedef thrust::detail::intptr_t index_type; - - index_type n = static_cast(decomp.size()); - -#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) -# pragma omp parallel for -#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE - for(index_type i = 0; i < n; i++) - { - InputIterator begin = input + decomp[i].begin(); - InputIterator end = input + decomp[i].end(); - - if (begin != end) - { - OutputType sum = thrust::raw_reference_cast(*begin); - - ++begin; - - while (begin != end) - { - sum = wrapped_binary_op(sum, *begin); - ++begin; - } - - OutputIterator tmp = output + i; - *tmp = sum; - } - } -#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE -} - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/remove.h b/compat/thrust/system/omp/detail/remove.h deleted file mode 100644 index ebcb49613c..0000000000 --- a/compat/thrust/system/omp/detail/remove.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template - ForwardIterator remove_if(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -template - ForwardIterator remove_if(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - - -template - OutputIterator remove_copy_if(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - - -template - OutputIterator remove_copy_if(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/remove.inl b/compat/thrust/system/omp/detail/remove.inl deleted file mode 100644 index c056f967e4..0000000000 --- a/compat/thrust/system/omp/detail/remove.inl +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template - ForwardIterator remove_if(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - // omp prefers generic::remove_if to cpp::remove_if - return thrust::system::detail::generic::remove_if(exec, first, last, pred); -} - - -template - ForwardIterator remove_if(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - // omp prefers generic::remove_if to cpp::remove_if - return thrust::system::detail::generic::remove_if(exec, first, last, stencil, pred); -} - - -template - OutputIterator remove_copy_if(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - // omp prefers generic::remove_copy_if to cpp::remove_copy_if - return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, pred); -} - -template - OutputIterator remove_copy_if(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - // omp prefers generic::remove_copy_if to cpp::remove_copy_if - return thrust::system::detail::generic::remove_copy_if(exec, first, last, stencil, result, pred); -} - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/replace.h b/compat/thrust/system/omp/detail/replace.h deleted file mode 100644 index c48555d0e1..0000000000 --- a/compat/thrust/system/omp/detail/replace.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits this algorithm -#include - diff --git a/compat/thrust/system/omp/detail/reverse.h b/compat/thrust/system/omp/detail/reverse.h deleted file mode 100644 index 04923d1f6a..0000000000 --- a/compat/thrust/system/omp/detail/reverse.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits reverse -#include - diff --git a/compat/thrust/system/omp/detail/scan.h b/compat/thrust/system/omp/detail/scan.h deleted file mode 100644 index c105951fff..0000000000 --- a/compat/thrust/system/omp/detail/scan.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits scan -#include - diff --git a/compat/thrust/system/omp/detail/scan_by_key.h b/compat/thrust/system/omp/detail/scan_by_key.h deleted file mode 100644 index bfbd5d69bd..0000000000 --- a/compat/thrust/system/omp/detail/scan_by_key.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits this algorithm -#include - diff --git a/compat/thrust/system/omp/detail/scatter.h b/compat/thrust/system/omp/detail/scatter.h deleted file mode 100644 index c48555d0e1..0000000000 --- a/compat/thrust/system/omp/detail/scatter.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits this algorithm -#include - diff --git a/compat/thrust/system/omp/detail/sequence.h b/compat/thrust/system/omp/detail/sequence.h deleted file mode 100644 index 811d8f5fbb..0000000000 --- a/compat/thrust/system/omp/detail/sequence.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits sequence -#include - diff --git a/compat/thrust/system/omp/detail/set_operations.h b/compat/thrust/system/omp/detail/set_operations.h deleted file mode 100644 index 687edb2e7d..0000000000 --- a/compat/thrust/system/omp/detail/set_operations.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits set_operations -#include - diff --git a/compat/thrust/system/omp/detail/sort.h b/compat/thrust/system/omp/detail/sort.h deleted file mode 100644 index 9a480f2799..0000000000 --- a/compat/thrust/system/omp/detail/sort.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template -void stable_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - -template -void stable_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/sort.inl b/compat/thrust/system/omp/detail/sort.inl deleted file mode 100644 index ab4f4a1bbe..0000000000 --- a/compat/thrust/system/omp/detail/sort.inl +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#include - -// don't attempt to #include this file without omp support -#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) -#include -#endif // omp support - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ -namespace sort_detail -{ - - -template -void inplace_merge(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator middle, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_value::type value_type; - - thrust::detail::temporary_array a(exec, first, middle); - thrust::detail::temporary_array b(exec, middle, last); - - thrust::system::cpp::detail::merge(exec, a.begin(), a.end(), b.begin(), b.end(), first, comp); -} - - -template -void inplace_merge_by_key(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 middle1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_value::type value_type1; - typedef typename thrust::iterator_value::type value_type2; - - RandomAccessIterator2 middle2 = first2 + (middle1 - first1); - RandomAccessIterator2 last2 = first2 + (last1 - first1); - - thrust::detail::temporary_array lhs1(exec, first1, middle1); - thrust::detail::temporary_array rhs1(exec, middle1, last1); - thrust::detail::temporary_array lhs2(exec, first2, middle2); - thrust::detail::temporary_array rhs2(exec, middle2, last2); - - thrust::system::cpp::detail::merge_by_key - (exec, - lhs1.begin(), lhs1.end(), rhs1.begin(), rhs1.end(), - lhs2.begin(), rhs2.begin(), - first1, first2, comp); -} - - -} // end sort_detail - - -template -void stable_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - // we're attempting to launch an omp kernel, assert we're compiling with omp support - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to enable OpenMP support in your compiler. X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - -#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) - typedef typename thrust::iterator_difference::type IndexType; - - if (first == last) - return; - - #pragma omp parallel - { - thrust::system::detail::internal::uniform_decomposition decomp(last - first, 1, omp_get_num_threads()); - - // process id - IndexType p_i = omp_get_thread_num(); - - // every thread sorts its own tile - if (p_i < decomp.size()) - { - thrust::system::cpp::detail::stable_sort(exec, - first + decomp[p_i].begin(), - first + decomp[p_i].end(), - comp); - } - - #pragma omp barrier - - IndexType nseg = decomp.size(); - IndexType h = 2; - - // keep track of which sub-range we're processing - IndexType a=p_i, b=p_i, c=p_i+1; - - while( nseg>1 ) - { - if(c >= decomp.size()) - c = decomp.size() - 1; - - if((p_i % h) == 0 && c > b) - { - thrust::system::omp::detail::sort_detail::inplace_merge - (exec, - first + decomp[a].begin(), - first + decomp[b].end(), - first + decomp[c].end(), - comp); - b = c; - c += h; - } - - nseg = (nseg + 1) / 2; - h *= 2; - - #pragma omp barrier - } - } -#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE -} - - -template -void stable_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp) -{ - // we're attempting to launch an omp kernel, assert we're compiling with omp support - // ======================================================================== - // X Note to the user: If you've found this line due to a compiler error, X - // X you need to enable OpenMP support in your compiler. X - // ======================================================================== - THRUST_STATIC_ASSERT( (thrust::detail::depend_on_instantiation::value) ); - -#if (THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE == THRUST_TRUE) - typedef typename thrust::iterator_difference::type IndexType; - - if (keys_first == keys_last) - return; - - #pragma omp parallel - { - thrust::system::detail::internal::uniform_decomposition decomp(keys_last - keys_first, 1, omp_get_num_threads()); - - // process id - IndexType p_i = omp_get_thread_num(); - - // every thread sorts its own tile - if (p_i < decomp.size()) - { - thrust::system::cpp::detail::stable_sort_by_key(exec, - keys_first + decomp[p_i].begin(), - keys_first + decomp[p_i].end(), - values_first + decomp[p_i].begin(), - comp); - } - - #pragma omp barrier - - IndexType nseg = decomp.size(); - IndexType h = 2; - - // keep track of which sub-range we're processing - IndexType a=p_i, b=p_i, c=p_i+1; - - while( nseg>1 ) - { - if(c >= decomp.size()) - c = decomp.size() - 1; - - if((p_i % h) == 0 && c > b) - { - thrust::system::omp::detail::sort_detail::inplace_merge_by_key - (exec, - keys_first + decomp[a].begin(), - keys_first + decomp[b].end(), - keys_first + decomp[c].end(), - values_first + decomp[a].begin(), - comp); - b = c; - c += h; - } - - nseg = (nseg + 1) / 2; - h *= 2; - - #pragma omp barrier - } - } -#endif // THRUST_DEVICE_COMPILER_IS_OMP_CAPABLE -} - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/swap_ranges.h b/compat/thrust/system/omp/detail/swap_ranges.h deleted file mode 100644 index e683aaaa6e..0000000000 --- a/compat/thrust/system/omp/detail/swap_ranges.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// omp inherits swap_ranges -#include - diff --git a/compat/thrust/system/omp/detail/tabulate.h b/compat/thrust/system/omp/detail/tabulate.h deleted file mode 100644 index da65d8e44d..0000000000 --- a/compat/thrust/system/omp/detail/tabulate.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits tabulate -#include - diff --git a/compat/thrust/system/omp/detail/temporary_buffer.h b/compat/thrust/system/omp/detail/temporary_buffer.h deleted file mode 100644 index 628bd75719..0000000000 --- a/compat/thrust/system/omp/detail/temporary_buffer.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special temporary buffer functions - diff --git a/compat/thrust/system/omp/detail/transform.h b/compat/thrust/system/omp/detail/transform.h deleted file mode 100644 index 70ce1f41b6..0000000000 --- a/compat/thrust/system/omp/detail/transform.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// omp inherits transform -#include - diff --git a/compat/thrust/system/omp/detail/transform_reduce.h b/compat/thrust/system/omp/detail/transform_reduce.h deleted file mode 100644 index 23ed07054a..0000000000 --- a/compat/thrust/system/omp/detail/transform_reduce.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits transform_reduce -#include - diff --git a/compat/thrust/system/omp/detail/transform_scan.h b/compat/thrust/system/omp/detail/transform_scan.h deleted file mode 100644 index fc2e55d0c0..0000000000 --- a/compat/thrust/system/omp/detail/transform_scan.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits transform_scan -#include - diff --git a/compat/thrust/system/omp/detail/uninitialized_copy.h b/compat/thrust/system/omp/detail/uninitialized_copy.h deleted file mode 100644 index 944f4baf0e..0000000000 --- a/compat/thrust/system/omp/detail/uninitialized_copy.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits uninitialized_copy -#include - diff --git a/compat/thrust/system/omp/detail/uninitialized_fill.h b/compat/thrust/system/omp/detail/uninitialized_fill.h deleted file mode 100644 index b9d6de20fa..0000000000 --- a/compat/thrust/system/omp/detail/uninitialized_fill.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits uninitialized_fill -#include - diff --git a/compat/thrust/system/omp/detail/unique.h b/compat/thrust/system/omp/detail/unique.h deleted file mode 100644 index 60c617bee9..0000000000 --- a/compat/thrust/system/omp/detail/unique.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - ForwardIterator unique(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred); - - -template - OutputIterator unique_copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred); - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/unique.inl b/compat/thrust/system/omp/detail/unique.inl deleted file mode 100644 index d66ac3bf4f..0000000000 --- a/compat/thrust/system/omp/detail/unique.inl +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - ForwardIterator unique(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred) -{ - // omp prefers generic::unique to cpp::unique - return thrust::system::detail::generic::unique(exec,first,last,binary_pred); -} // end unique() - - -template - OutputIterator unique_copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred) -{ - // omp prefers generic::unique_copy to cpp::unique_copy - return thrust::system::detail::generic::unique_copy(exec,first,last,output,binary_pred); -} // end unique_copy() - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/unique_by_key.h b/compat/thrust/system/omp/detail/unique_by_key.h deleted file mode 100644 index 8fdde66edf..0000000000 --- a/compat/thrust/system/omp/detail/unique_by_key.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - thrust::pair - unique_by_key(execution_policy &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred); - - -template - thrust::pair - unique_by_key_copy(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred); - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/omp/detail/unique_by_key.inl b/compat/thrust/system/omp/detail/unique_by_key.inl deleted file mode 100644 index 644b5ed6aa..0000000000 --- a/compat/thrust/system/omp/detail/unique_by_key.inl +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - - -template - thrust::pair - unique_by_key(execution_policy &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred) -{ - // omp prefers generic::unique_by_key to cpp::unique_by_key - return thrust::system::detail::generic::unique_by_key(exec,keys_first,keys_last,values_first,binary_pred); -} // end unique_by_key() - - -template - thrust::pair - unique_by_key_copy(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - // omp prefers generic::unique_by_key_copy to cpp::unique_by_key_copy - return thrust::system::detail::generic::unique_by_key_copy(exec,keys_first,keys_last,values_first,keys_output,values_output,binary_pred); -} // end unique_by_key_copy() - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/omp/detail/vector.inl b/compat/thrust/system/omp/detail/vector.inl deleted file mode 100644 index 32c845c4ad..0000000000 --- a/compat/thrust/system/omp/detail/vector.inl +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ - -template - vector - ::vector() - : super_t() -{} - -template - vector - ::vector(size_type n) - : super_t(n) -{} - -template - vector - ::vector(size_type n, const value_type &value) - : super_t(n,value) -{} - -template - vector - ::vector(const vector &x) - : super_t(x) -{} - -template - template - vector - ::vector(const thrust::detail::vector_base &x) - : super_t(x) -{} - -template - template - vector - ::vector(const std::vector &x) - : super_t(x) -{} - -template - template - vector - ::vector(InputIterator first, InputIterator last) - : super_t(first,last) -{} - -template - template - vector & - vector - ::operator=(const std::vector &x) -{ - super_t::operator=(x); - return *this; -} - -template - template - vector & - vector - ::operator=(const thrust::detail::vector_base &x) -{ - super_t::operator=(x); - return *this; -} - -} // end omp -} // end system -} // end thrust - diff --git a/compat/thrust/system/omp/execution_policy.h b/compat/thrust/system/omp/execution_policy.h deleted file mode 100644 index 7d5d1d80d3..0000000000 --- a/compat/thrust/system/omp/execution_policy.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -/*! \file thrust/system/omp/execution_policy.h - * \brief Execution policies for Thrust's OpenMP system. - */ - -#include - -// get the execution policies definitions first -#include - -// get the definition of par -#include - -// now get all the algorithm definitions - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -// define these entities here for the purpose of Doxygenating them -// they are actually defined elsewhere -#if 0 -namespace thrust -{ -namespace system -{ -namespace omp -{ - - -/*! \addtogroup execution_policies - * \{ - */ - - -/*! \p thrust::omp::execution_policy is the base class for all Thrust parallel execution - * policies which are derived from Thrust's OpenMP backend system. - */ -template -struct execution_policy : thrust::execution_policy -{}; - - -/*! \p omp::tag is a type representing Thrust's standard C++ backend system in C++'s type system. - * Iterators "tagged" with a type which is convertible to \p omp::tag assert that they may be - * "dispatched" to algorithm implementations in the \p omp system. - */ -struct tag : thrust::system::omp::execution_policy { unspecified }; - - -/*! \p thrust::omp::par is the parallel execution policy associated with Thrust's OpenMP - * backend system. - * - * Instead of relying on implicit algorithm dispatch through iterator system tags, users may - * directly target Thrust's OpenMP backend system by providing \p thrust::omp::par as an algorithm - * parameter. - * - * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such - * as \p thrust::omp::vector. - * - * The type of \p thrust::omp::par is implementation-defined. - * - * The following code snippet demonstrates how to use \p thrust::omp::par to explicitly dispatch an - * invocation of \p thrust::for_each to the OpenMP backend system: - * - * \code - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * printf("%d\n"); - * } - * }; - * ... - * int vec[3]; - * vec[0] = 0; vec[1] = 1; vec[2] = 2; - * - * thrust::for_each(thrust::omp::par, vec.begin(), vec.end(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - */ -static const unspecified par; - - -/*! \} - */ - - -} // end cpp -} // end system -} // end thrust -#endif - - diff --git a/compat/thrust/system/omp/memory.h b/compat/thrust/system/omp/memory.h deleted file mode 100644 index 0a23434833..0000000000 --- a/compat/thrust/system/omp/memory.h +++ /dev/null @@ -1,414 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/omp/memory.h - * \brief Managing memory associated with Thrust's OpenMP system. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ - -template class pointer; - -} // end omp -} // end system -} // end thrust - - -/*! \cond - */ - -// specialize std::iterator_traits to avoid problems with the name of -// pointer's constructor shadowing its nested pointer type -// do this before pointer is defined so the specialization is correctly -// used inside the definition -namespace std -{ - -template - struct iterator_traits > -{ - private: - typedef thrust::system::omp::pointer ptr; - - public: - typedef typename ptr::iterator_category iterator_category; - typedef typename ptr::value_type value_type; - typedef typename ptr::difference_type difference_type; - typedef ptr pointer; - typedef typename ptr::reference reference; -}; // end iterator_traits - -} // end std - -/*! \endcond - */ - - -namespace thrust -{ -namespace system -{ - -/*! \addtogroup system_backends Systems - * \ingroup system - * \{ - */ - -/*! \namespace thrust::system::omp - * \brief \p thrust::system::omp is the namespace containing functionality for allocating, manipulating, - * and deallocating memory available to Thrust's OpenMP backend system. - * The identifiers are provided in a separate namespace underneath thrust::system - * for import convenience but are also aliased in the top-level thrust::omp - * namespace for easy access. - * - */ -namespace omp -{ - -// forward declaration of reference for pointer -template class reference; - -/*! \cond - */ - -// XXX nvcc + msvc have trouble instantiating reference below -// this is a workaround -namespace detail -{ - -template - struct reference_msvc_workaround -{ - typedef thrust::system::omp::reference type; -}; // end reference_msvc_workaround - -} // end detail - -/*! \endcond - */ - - -/*! \p pointer stores a pointer to an object allocated in memory available to the omp system. - * This type provides type safety when dispatching standard algorithms on ranges resident - * in omp memory. - * - * \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic. - * - * \p pointer can be created with the function \p omp::malloc, or by explicitly calling its constructor - * with a raw pointer. - * - * The raw pointer encapsulated by a \p pointer may be obtained by eiter its get member function - * or the \p raw_pointer_cast function. - * - * \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory - * pointed to by \p pointer. - * - * \tparam T specifies the type of the pointee. - * - * \see omp::malloc - * \see omp::free - * \see raw_pointer_cast - */ -template - class pointer - : public thrust::pointer< - T, - thrust::system::omp::tag, - thrust::system::omp::reference, - thrust::system::omp::pointer - > -{ - /*! \cond - */ - - private: - typedef thrust::pointer< - T, - thrust::system::omp::tag, - //thrust::system::omp::reference, - typename detail::reference_msvc_workaround::type, - thrust::system::omp::pointer - > super_t; - - /*! \endcond - */ - - public: - // note that omp::pointer's member functions need __host__ __device__ - // to interoperate with nvcc + iterators' dereference member function - - /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0. - */ - __host__ __device__ - pointer() : super_t() {} - - /*! This constructor allows construction of a pointer from a T*. - * - * \param ptr A raw pointer to copy from, presumed to point to a location in memory - * accessible by the \p omp system. - * \tparam OtherT \p OtherT shall be convertible to \p T. - */ - template - __host__ __device__ - explicit pointer(OtherT *ptr) : super_t(ptr) {} - - /*! This constructor allows construction from another pointer-like object with related type. - * - * \param other The \p OtherPointer to copy. - * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible - * to \p thrust::system::omp::tag and its element type shall be convertible to \p T. - */ - template - __host__ __device__ - pointer(const OtherPointer &other, - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer - >::type * = 0) : super_t(other) {} - - /*! Assignment operator allows assigning from another pointer-like object with related type. - * - * \param other The other pointer-like object to assign from. - * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible - * to \p thrust::system::omp::tag and its element type shall be convertible to \p T. - */ - template - __host__ __device__ - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer, - pointer & - >::type - operator=(const OtherPointer &other) - { - return super_t::operator=(other); - } -}; // end pointer - - -/*! \p reference is a wrapped reference to an object stored in memory available to the \p omp system. - * \p reference is the type of the result of dereferencing a \p omp::pointer. - * - * \tparam T Specifies the type of the referenced object. - */ -template - class reference - : public thrust::reference< - T, - thrust::system::omp::pointer, - thrust::system::omp::reference - > -{ - /*! \cond - */ - - private: - typedef thrust::reference< - T, - thrust::system::omp::pointer, - thrust::system::omp::reference - > super_t; - - /*! \endcond - */ - - public: - /*! \cond - */ - - typedef typename super_t::value_type value_type; - typedef typename super_t::pointer pointer; - - /*! \endcond - */ - - /*! This constructor initializes this \p reference to refer to an object - * pointed to by the given \p pointer. After this \p reference is constructed, - * it shall refer to the object pointed to by \p ptr. - * - * \param ptr A \p pointer to copy from. - */ - __host__ __device__ - explicit reference(const pointer &ptr) - : super_t(ptr) - {} - - /*! This constructor accepts a const reference to another \p reference of related type. - * After this \p reference is constructed, it shall refer to the same object as \p other. - * - * \param other A \p reference to copy from. - * \tparam OtherT The element type of the other \p reference. - * - * \note This constructor is templated primarily to allow initialization of reference - * from reference. - */ - template - __host__ __device__ - reference(const reference &other, - typename thrust::detail::enable_if_convertible< - typename reference::pointer, - pointer - >::type * = 0) - : super_t(other) - {} - - /*! Copy assignment operator copy assigns from another \p reference of related type. - * - * \param other The other \p reference to assign from. - * \return *this - * \tparam OtherT The element type of the other \p reference. - */ - template - reference &operator=(const reference &other); - - /*! Assignment operator assigns from a \p value_type. - * - * \param x The \p value_type to assign from. - * \return *this - */ - reference &operator=(const value_type &x); -}; // end reference - -/*! Exchanges the values of two objects referred to by \p reference. - * \p x The first \p reference of interest. - * \p y The second \p reference ot interest. - */ -template -__host__ __device__ -void swap(reference x, reference y); - -/*! Allocates an area of memory available to Thrust's omp system. - * \param n Number of bytes to allocate. - * \return A omp::pointer pointing to the beginning of the newly - * allocated memory. A null omp::pointer is returned if - * an error occurs. - * \note The omp::pointer returned by this function must be - * deallocated with \p omp::free. - * \see omp::free - * \see std::malloc - */ -inline pointer malloc(std::size_t n); - -/*! Allocates a typed area of memory available to Thrust's omp system. - * \param n Number of elements to allocate. - * \return A omp::pointer pointing to the beginning of the newly - * allocated memory. A null omp::pointer is returned if - * an error occurs. - * \note The omp::pointer returned by this function must be - * deallocated with \p omp::free. - * \see omp::free - * \see std::malloc - */ -template -inline pointer malloc(std::size_t n); - -/*! Deallocates an area of memory previously allocated by omp::malloc. - * \param ptr A omp::pointer pointing to the beginning of an area - * of memory previously allocated with omp::malloc. - * \see omp::malloc - * \see std::free - */ -inline void free(pointer ptr); - -// XXX upon c++11 -// template using allocator = thrust::detail::malloc_allocator >; - -/*! \p omp::allocator is the default allocator used by the \p omp system's containers such as - * omp::vector if no user-specified allocator is provided. \p omp::allocator allocates - * (deallocates) storage with \p omp::malloc (\p omp::free). - */ -template - struct allocator - : thrust::detail::malloc_allocator< - T, - tag, - pointer - > -{ - /*! The \p rebind metafunction provides the type of an \p allocator - * instantiated with another type. - * - * \tparam U The other type to use for instantiation. - */ - template - struct rebind - { - /*! The typedef \p other gives the type of the rebound \p allocator. - */ - typedef allocator other; - }; - - /*! No-argument constructor has no effect. - */ - __host__ __device__ - inline allocator() {} - - /*! Copy constructor has no effect. - */ - __host__ __device__ - inline allocator(const allocator &) {} - - /*! Constructor from other \p allocator has no effect. - */ - template - __host__ __device__ - inline allocator(const allocator &) {} - - /*! Destructor has no effect. - */ - __host__ __device__ - inline ~allocator() {} -}; // end allocator - -} // end omp - -/*! \} - */ - -} // end system - -/*! \namespace thrust::omp - * \brief \p thrust::omp is a top-level alias for thrust::system::omp. - */ -namespace omp -{ - -using thrust::system::omp::pointer; -using thrust::system::omp::reference; -using thrust::system::omp::malloc; -using thrust::system::omp::free; -using thrust::system::omp::allocator; - -} // end omp - -} // end thrust - -#include - diff --git a/compat/thrust/system/omp/vector.h b/compat/thrust/system/omp/vector.h deleted file mode 100644 index 5f45a9169b..0000000000 --- a/compat/thrust/system/omp/vector.h +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/omp/vector.h - * \brief A dynamically-sizable array of elements which reside in memory available to - * Thrust's OpenMP system. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ - -// forward declaration of host_vector -// XXX why is this here? it doesn't seem necessary for anything below -template class host_vector; - -namespace system -{ -namespace omp -{ - -// XXX upon c++11 -// template > using vector = thrust::detail::vector_base; - -/*! \p omp::vector is a container that supports random access to elements, - * constant time removal of elements at the end, and linear time insertion - * and removal of elements at the beginning or in the middle. The number of - * elements in a \p omp::vector may vary dynamically; memory management is - * automatic. The elements contained in an \p omp::vector reside in memory - * available to the \p omp system. - * - * \tparam T The element type of the \p omp::vector. - * \tparam Allocator The allocator type of the \p omp::vector. Defaults to \p omp::allocator. - * - * \see http://www.sgi.com/tech/stl/Vector.html - * \see host_vector For the documentation of the complete interface which is - * shared by \p omp::vector - * \see device_vector - */ -template > - class vector - : public thrust::detail::vector_base -{ - /*! \cond - */ - private: - typedef thrust::detail::vector_base super_t; - /*! \endcond - */ - - public: - - /*! \cond - */ - typedef typename super_t::size_type size_type; - typedef typename super_t::value_type value_type; - /*! \endcond - */ - - /*! This constructor creates an empty \p omp::vector. - */ - vector(); - - /*! This constructor creates a \p omp::vector with \p n default-constructed elements. - * \param n The size of the \p omp::vector to create. - */ - explicit vector(size_type n); - - /*! This constructor creates a \p omp::vector with \p n copies of \p value. - * \param n The size of the \p omp::vector to create. - * \param value An element to copy. - */ - explicit vector(size_type n, const value_type &value); - - /*! Copy constructor copies from another \p omp::vector. - * \param x The other \p omp::vector to copy. - */ - vector(const vector &x); - - /*! This constructor copies from another Thrust vector-like object. - * \param x The other object to copy from. - */ - template - vector(const thrust::detail::vector_base &x); - - /*! This constructor copies from a \c std::vector. - * \param x The \c std::vector to copy from. - */ - template - vector(const std::vector &x); - - /*! This constructor creates an \p omp::vector by copying from a range. - * \param first The beginning of the range. - * \param last The end of the range. - */ - template - vector(InputIterator first, InputIterator last); - - // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns - - /*! Assignment operator assigns from a \c std::vector. - * \param x The \c std::vector to assign from. - * \return *this - */ - template - vector &operator=(const std::vector &x); - - /*! Assignment operator assigns from another Thrust vector-like object. - * \param x The other object to assign from. - * \return *this - */ - template - vector &operator=(const thrust::detail::vector_base &x); -}; // end vector - -} // end omp -} // end system - -// alias system::omp names at top-level -namespace omp -{ - -using thrust::system::omp::vector; - -} // end omp - -} // end thrust - -#include - diff --git a/compat/thrust/system/system_error.h b/compat/thrust/system/system_error.h deleted file mode 100644 index 6f94b61e0a..0000000000 --- a/compat/thrust/system/system_error.h +++ /dev/null @@ -1,179 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file system/system_error.h - * \brief An exception object used to report error conditions that have an - * associated error code - */ - -#pragma once - -#include -#include -#include - -#include - -namespace thrust -{ - -namespace system -{ - -// [19.5.5] Class system_error - -// [19.5.5.1] Class system_error overview - -/*! \addtogroup system_diagnostics System Diagnostics - * \ingroup system - * \{ - */ - -/*! \brief The class \p system_error describes an exception object used to report error - * conditions that have an associated \p error_code. Such error conditions typically - * originate from the operating system or other low-level application program interfaces. - * - * Thrust uses \p system_error to report the error codes returned from device backends - * such as the CUDA runtime. - * - * The following code listing demonstrates how to catch a \p system_error to recover - * from an error. - * - * \code - * - * #include - * #include - * #include - * - * void terminate_gracefully(void) - * { - * // application-specific termination code here - * ... - * } - * - * int main(void) - * { - * try - * { - * thrust::device_vector vec; - * thrust::sort(vec.begin(), vec.end()); - * } - * catch(thrust::system_error e) - * { - * std::cerr << "Error inside sort: " << e.what() << std::endl; - * terminate_gracefully(); - * } - * - * return 0; - * } - * - * \endcode - * - * \note If an error represents an out-of-memory condition, implementations are encouraged - * to throw an exception object of type \p std::bad_alloc rather than \p system_error. - */ -class system_error - : public std::runtime_error -{ - public: - // [19.5.5.2] Class system_error members - - /*! Constructs an object of class \p system_error. - * \param ec The value returned by \p code(). - * \param what_arg A string to include in the result returned by \p what(). - * \post code() == ec. - * \post std::string(what()).find(what_arg) != string::npos. - */ - inline system_error(error_code ec, const std::string &what_arg); - - /*! Constructs an object of class \p system_error. - * \param ec The value returned by \p code(). - * \param what_arg A string to include in the result returned by \p what(). - * \post code() == ec. - * \post std::string(what()).find(what_arg) != string::npos. - */ - inline system_error(error_code ec, const char *what_arg); - - /*! Constructs an object of class \p system_error. - * \param ec The value returned by \p code(). - * \post code() == ec. - */ - inline system_error(error_code ec); - - /*! Constructs an object of class \p system_error. - * \param ev The error value used to create an \p error_code. - * \param ecat The \p error_category used to create an \p error_code. - * \param what_arg A string to include in the result returned by \p what(). - * \post code() == error_code(ev, ecat). - * \post std::string(what()).find(what_arg) != string::npos. - */ - inline system_error(int ev, const error_category &ecat, const std::string &what_arg); - - /*! Constructs an object of class \p system_error. - * \param ev The error value used to create an \p error_code. - * \param ecat The \p error_category used to create an \p error_code. - * \param what_arg A string to include in the result returned by \p what(). - * \post code() == error_code(ev, ecat). - * \post std::string(what()).find(what_arg) != string::npos. - */ - inline system_error(int ev, const error_category &ecat, const char *what_arg); - - /*! Constructs an object of class \p system_error. - * \param ev The error value used to create an \p error_code. - * \param ecat The \p error_category used to create an \p error_code. - * \post code() == error_code(ev, ecat). - */ - inline system_error(int ev, const error_category &ecat); - - /*! Destructor does not throw. - */ - inline virtual ~system_error(void) throw () {}; - - /*! Returns an object encoding the error. - * \return ec or error_code(ev, ecat), from the - * constructor, as appropriate. - */ - inline const error_code &code(void) const throw(); - - /*! Returns a human-readable string indicating the nature of the error. - * \return a string incorporating code().message() and the - * arguments supplied in the constructor. - */ - inline const char *what(void) const throw(); - - /*! \cond - */ - private: - error_code m_error_code; - mutable std::string m_what; - - /*! \endcond - */ -}; // end system_error - -} // end system - -/*! \} // end system_diagnostics - */ - -// import names into thrust:: -using system::system_error; - -} // end thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/adjacent_difference.h b/compat/thrust/system/tbb/detail/adjacent_difference.h deleted file mode 100644 index 37c9adc2c9..0000000000 --- a/compat/thrust/system/tbb/detail/adjacent_difference.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - -template - OutputIterator adjacent_difference(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - // tbb prefers generic::adjacent_difference to cpp::adjacent_difference - return thrust::system::detail::generic::adjacent_difference(exec, first, last, result, binary_op); -} // end adjacent_difference() - -} // end detail -} // end tbb -} // end system -} // end thrust - diff --git a/compat/thrust/system/tbb/detail/assign_value.h b/compat/thrust/system/tbb/detail/assign_value.h deleted file mode 100644 index eda3b977b8..0000000000 --- a/compat/thrust/system/tbb/detail/assign_value.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits assign_value -#include - diff --git a/compat/thrust/system/tbb/detail/binary_search.h b/compat/thrust/system/tbb/detail/binary_search.h deleted file mode 100644 index 8dec989771..0000000000 --- a/compat/thrust/system/tbb/detail/binary_search.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits binary_search -#include - diff --git a/compat/thrust/system/tbb/detail/copy.h b/compat/thrust/system/tbb/detail/copy.h deleted file mode 100644 index 7604e6f4ad..0000000000 --- a/compat/thrust/system/tbb/detail/copy.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template -OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -template -OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result); - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/copy.inl b/compat/thrust/system/tbb/detail/copy.inl deleted file mode 100644 index 6d354d0b18..0000000000 --- a/compat/thrust/system/tbb/detail/copy.inl +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace dispatch -{ - -template - OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - thrust::incrementable_traversal_tag) -{ - return thrust::system::cpp::detail::copy(exec, first, last, result); -} // end copy() - - -template - OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - thrust::random_access_traversal_tag) -{ - return thrust::system::detail::generic::copy(exec, first, last, result); -} // end copy() - - -template - OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result, - thrust::incrementable_traversal_tag) -{ - return thrust::system::cpp::detail::copy_n(exec, first, n, result); -} // end copy_n() - - -template - OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result, - thrust::random_access_traversal_tag) -{ - return thrust::system::detail::generic::copy_n(exec, first, n, result); -} // end copy_n() - -} // end dispatch - - -template -OutputIterator copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result) -{ - typedef typename thrust::iterator_traversal::type traversal1; - typedef typename thrust::iterator_traversal::type traversal2; - - typedef typename thrust::detail::minimum_type::type traversal; - - // dispatch on minimum traversal - return thrust::system::tbb::detail::dispatch::copy(exec,first,last,result,traversal()); -} // end copy() - - - -template -OutputIterator copy_n(execution_policy &exec, - InputIterator first, - Size n, - OutputIterator result) -{ - typedef typename thrust::iterator_traversal::type traversal1; - typedef typename thrust::iterator_traversal::type traversal2; - - typedef typename thrust::detail::minimum_type::type traversal; - - // dispatch on minimum traversal - return thrust::system::tbb::detail::dispatch::copy_n(exec,first,n,result,traversal()); -} // end copy_n() - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/copy_if.h b/compat/thrust/system/tbb/detail/copy_if.h deleted file mode 100644 index ffbd4f8f46..0000000000 --- a/compat/thrust/system/tbb/detail/copy_if.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - OutputIterator copy_if(tag, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - - -} // end detail -} // end tbb -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/copy_if.inl b/compat/thrust/system/tbb/detail/copy_if.inl deleted file mode 100644 index 4353b3b2f7..0000000000 --- a/compat/thrust/system/tbb/detail/copy_if.inl +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace copy_if_detail -{ - -template -struct body -{ - - InputIterator1 first; - InputIterator2 stencil; - OutputIterator result; - thrust::detail::host_function pred; - Size sum; - - body(InputIterator1 first, InputIterator2 stencil, OutputIterator result, Predicate pred) - : first(first), stencil(stencil), result(result), pred(pred), sum(0) - {} - - body(body& b, ::tbb::split) - : first(b.first), stencil(b.stencil), result(b.result), pred(b.pred), sum(0) - {} - - void operator()(const ::tbb::blocked_range& r, ::tbb::pre_scan_tag) - { - InputIterator2 iter = stencil + r.begin(); - - for (Size i = r.begin(); i != r.end(); ++i, ++iter) - { - if (pred(*iter)) - ++sum; - } - } - - void operator()(const ::tbb::blocked_range& r, ::tbb::final_scan_tag) - { - InputIterator1 iter1 = first + r.begin(); - InputIterator2 iter2 = stencil + r.begin(); - OutputIterator iter3 = result + sum; - - for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2) - { - if (pred(*iter2)) - { - *iter3 = *iter1; - ++sum; - ++iter3; - } - } - } - - void reverse_join(body& b) - { - sum = b.sum + sum; - } - - void assign(body& b) - { - sum = b.sum; - } -}; // end body - -} // end copy_if_detail - -template - OutputIterator copy_if(tag, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - typedef typename thrust::iterator_difference::type Size; - typedef typename copy_if_detail::body Body; - - Size n = thrust::distance(first, last); - - if (n != 0) - { - Body body(first, stencil, result, pred); - ::tbb::parallel_scan(::tbb::blocked_range(0,n), body); - thrust::advance(result, body.sum); - } - - return result; -} // end copy_if() - -} // end detail -} // end tbb -} // end system -} // end thrust - diff --git a/compat/thrust/system/tbb/detail/count.h b/compat/thrust/system/tbb/detail/count.h deleted file mode 100644 index da31ee8700..0000000000 --- a/compat/thrust/system/tbb/detail/count.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits count -#include - diff --git a/compat/thrust/system/tbb/detail/equal.h b/compat/thrust/system/tbb/detail/equal.h deleted file mode 100644 index 74e55183d9..0000000000 --- a/compat/thrust/system/tbb/detail/equal.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits equal -#include - diff --git a/compat/thrust/system/tbb/detail/execution_policy.h b/compat/thrust/system/tbb/detail/execution_policy.h deleted file mode 100644 index 167d1dc4ce..0000000000 --- a/compat/thrust/system/tbb/detail/execution_policy.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -// put the canonical tag in the same ns as the backend's entry points -namespace tbb -{ -namespace detail -{ - -// this awkward sequence of definitions arise -// from the desire both for tag to derive -// from execution_policy and for execution_policy -// to convert to tag (when execution_policy is not -// an ancestor of tag) - -// forward declaration of tag -struct tag; - -// forward declaration of execution_policy -template struct execution_policy; - -// specialize execution_policy for tag -template<> - struct execution_policy - : thrust::system::cpp::detail::execution_policy -{}; - -// tag's definition comes before the -// generic definition of execution_policy -struct tag : execution_policy {}; - -// allow conversion to tag when it is not a successor -template - struct execution_policy - : thrust::system::cpp::detail::execution_policy -{ - // allow conversion to tag - inline operator tag () const - { - return tag(); - } -}; - -} // end detail - -// alias execution_policy and tag here -using thrust::system::tbb::detail::execution_policy; -using thrust::system::tbb::detail::tag; - -} // end tbb -} // end system - -// alias items at top-level -namespace tbb -{ - -using thrust::system::tbb::execution_policy; -using thrust::system::tbb::tag; - -} // end tbb -} // end thrust - diff --git a/compat/thrust/system/tbb/detail/extrema.h b/compat/thrust/system/tbb/detail/extrema.h deleted file mode 100644 index 4715a8948b..0000000000 --- a/compat/thrust/system/tbb/detail/extrema.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - -template -ForwardIterator max_element(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // tbb prefers generic::max_element to cpp::max_element - return thrust::system::detail::generic::max_element(exec, first, last, comp); -} // end max_element() - -template -ForwardIterator min_element(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // tbb prefers generic::min_element to cpp::min_element - return thrust::system::detail::generic::min_element(exec, first, last, comp); -} // end min_element() - -template -thrust::pair minmax_element(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate comp) -{ - // tbb prefers generic::minmax_element to cpp::minmax_element - return thrust::system::detail::generic::minmax_element(exec, first, last, comp); -} // end minmax_element() - -} // end detail -} // end tbb -} // end system -} // end thrust - - diff --git a/compat/thrust/system/tbb/detail/fill.h b/compat/thrust/system/tbb/detail/fill.h deleted file mode 100644 index 5219e1c7c5..0000000000 --- a/compat/thrust/system/tbb/detail/fill.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits fill -#include - diff --git a/compat/thrust/system/tbb/detail/find.h b/compat/thrust/system/tbb/detail/find.h deleted file mode 100644 index d351454c16..0000000000 --- a/compat/thrust/system/tbb/detail/find.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - -template -InputIterator find_if(execution_policy &exec, - InputIterator first, - InputIterator last, - Predicate pred) -{ - // tbb prefers generic::find_if to cpp::find_if - return thrust::system::detail::generic::find_if(exec, first, last, pred); -} - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/for_each.h b/compat/thrust/system/tbb/detail/for_each.h deleted file mode 100644 index 573bb819a2..0000000000 --- a/compat/thrust/system/tbb/detail/for_each.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - -template - RandomAccessIterator for_each(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - UnaryFunction f); - -template - RandomAccessIterator for_each_n(execution_policy &exec, - RandomAccessIterator first, - Size n, - UnaryFunction f); - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/for_each.inl b/compat/thrust/system/tbb/detail/for_each.inl deleted file mode 100644 index b09c7be881..0000000000 --- a/compat/thrust/system/tbb/detail/for_each.inl +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace for_each_detail -{ - -template - struct body -{ - RandomAccessIterator m_first; - UnaryFunction m_f; - - body(RandomAccessIterator first, UnaryFunction f) - : m_first(first), m_f(f) - {} - - void operator()(const ::tbb::blocked_range &r) const - { - // we assume that blocked_range specifies a contiguous range of integers - thrust::system::detail::internal::scalar::for_each_n(m_first + r.begin(), r.size(), m_f); - } // end operator()() -}; // end body - - -template - body - make_body(RandomAccessIterator first, UnaryFunction f) -{ - return body(first, f); -} // end make_body() - - -} // end for_each_detail - - -template -RandomAccessIterator for_each_n(execution_policy &, - RandomAccessIterator first, - Size n, - UnaryFunction f) -{ - ::tbb::parallel_for(::tbb::blocked_range(0,n), for_each_detail::make_body(first,f)); - - // return the end of the range - return first + n; -} // end for_each_n - - -template - RandomAccessIterator for_each(execution_policy &s, - RandomAccessIterator first, - RandomAccessIterator last, - UnaryFunction f) -{ - return tbb::detail::for_each_n(s, first, thrust::distance(first,last), f); -} // end for_each() - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/gather.h b/compat/thrust/system/tbb/detail/gather.h deleted file mode 100644 index dfb7d7fc2d..0000000000 --- a/compat/thrust/system/tbb/detail/gather.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits gather -#include - diff --git a/compat/thrust/system/tbb/detail/generate.h b/compat/thrust/system/tbb/detail/generate.h deleted file mode 100644 index 0cb33b9336..0000000000 --- a/compat/thrust/system/tbb/detail/generate.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits generate -#include - diff --git a/compat/thrust/system/tbb/detail/get_value.h b/compat/thrust/system/tbb/detail/get_value.h deleted file mode 100644 index e376e65749..0000000000 --- a/compat/thrust/system/tbb/detail/get_value.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits get_value -#include - diff --git a/compat/thrust/system/tbb/detail/inner_product.h b/compat/thrust/system/tbb/detail/inner_product.h deleted file mode 100644 index 351421a577..0000000000 --- a/compat/thrust/system/tbb/detail/inner_product.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits inner_product -#include - diff --git a/compat/thrust/system/tbb/detail/iter_swap.h b/compat/thrust/system/tbb/detail/iter_swap.h deleted file mode 100644 index 16176ec69b..0000000000 --- a/compat/thrust/system/tbb/detail/iter_swap.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits iter_swap -#include - diff --git a/compat/thrust/system/tbb/detail/logical.h b/compat/thrust/system/tbb/detail/logical.h deleted file mode 100644 index b2a80de70f..0000000000 --- a/compat/thrust/system/tbb/detail/logical.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits logical -#include - diff --git a/compat/thrust/system/tbb/detail/malloc_and_free.h b/compat/thrust/system/tbb/detail/malloc_and_free.h deleted file mode 100644 index 811a552a4f..0000000000 --- a/compat/thrust/system/tbb/detail/malloc_and_free.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits malloc and free -#include - diff --git a/compat/thrust/system/tbb/detail/memory.inl b/compat/thrust/system/tbb/detail/memory.inl deleted file mode 100644 index 420a8a14b4..0000000000 --- a/compat/thrust/system/tbb/detail/memory.inl +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ - - -template - template - reference & - reference - ::operator=(const reference &other) -{ - return super_t::operator=(other); -} // end reference::operator=() - -template - reference & - reference - ::operator=(const value_type &x) -{ - return super_t::operator=(x); -} // end reference::operator=() - -template -__host__ __device__ -void swap(reference a, reference b) -{ - a.swap(b); -} // end swap() - -namespace detail -{ - -// XXX circular #inclusion problems cause the compiler to believe that cpp::malloc -// is not defined -// WAR the problem by using adl to call cpp::malloc, which requires it to depend -// on a template parameter -template - pointer malloc_workaround(Tag t, std::size_t n) -{ - return pointer(malloc(t, n)); -} // end malloc_workaround() - -// XXX circular #inclusion problems cause the compiler to believe that cpp::free -// is not defined -// WAR the problem by using adl to call cpp::free, which requires it to depend -// on a template parameter -template - void free_workaround(Tag t, pointer ptr) -{ - free(t, ptr.get()); -} // end free_workaround() - -} // end detail - -inline pointer malloc(std::size_t n) -{ - // XXX this is how we'd like to implement this function, - // if not for circular #inclusion problems: - // - // return pointer(thrust::system::cpp::malloc(n)) - // - return detail::malloc_workaround(cpp::tag(), n); -} // end malloc() - -template -pointer malloc(std::size_t n) -{ - pointer raw_ptr = thrust::system::tbb::malloc(sizeof(T) * n); - return pointer(reinterpret_cast(raw_ptr.get())); -} // end malloc() - -inline void free(pointer ptr) -{ - // XXX this is how we'd like to implement this function, - // if not for circular #inclusion problems: - // - // thrust::system::cpp::free(ptr) - // - detail::free_workaround(cpp::tag(), ptr); -} // end free() - -} // end tbb -} // end system -} // end thrust - diff --git a/compat/thrust/system/tbb/detail/merge.h b/compat/thrust/system/tbb/detail/merge.h deleted file mode 100644 index 7b203ec327..0000000000 --- a/compat/thrust/system/tbb/detail/merge.h +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - -template -OutputIterator merge(execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp); - -template -thrust::pair - merge_by_key(execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first3, - InputIterator4 values_first4, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp); - -} // end detail -} // end tbb -} // end system -} // end thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/merge.inl b/compat/thrust/system/tbb/detail/merge.inl deleted file mode 100644 index cc902af85b..0000000000 --- a/compat/thrust/system/tbb/detail/merge.inl +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace merge_detail -{ - -template -struct range -{ - InputIterator1 first1, last1; - InputIterator2 first2, last2; - OutputIterator result; - StrictWeakOrdering comp; - size_t grain_size; - - range(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp, - size_t grain_size = 1024) - : first1(first1), last1(last1), - first2(first2), last2(last2), - result(result), comp(comp), grain_size(grain_size) - {} - - range(range& r, ::tbb::split) - : first1(r.first1), last1(r.last1), - first2(r.first2), last2(r.last2), - result(r.result), comp(r.comp), grain_size(r.grain_size) - { - // we can assume n1 and n2 are not both 0 - size_t n1 = thrust::distance(first1, last1); - size_t n2 = thrust::distance(first2, last2); - - InputIterator1 mid1 = first1; - InputIterator2 mid2 = first2; - - if (n1 > n2) - { - mid1 += n1 / 2; - mid2 = thrust::system::detail::internal::scalar::lower_bound(first2, last2, raw_reference_cast(*mid1), comp); - } - else - { - mid2 += n2 / 2; - mid1 = thrust::system::detail::internal::scalar::upper_bound(first1, last1, raw_reference_cast(*mid2), comp); - } - - // set first range to [first1, mid1), [first2, mid2), result - r.last1 = mid1; - r.last2 = mid2; - - // set second range to [mid1, last1), [mid2, last2), result + (mid1 - first1) + (mid2 - first2) - first1 = mid1; - first2 = mid2; - result += thrust::distance(r.first1, mid1) + thrust::distance(r.first2, mid2); - } - - bool empty(void) const - { - return (first1 == last1) && (first2 == last2); - } - - bool is_divisible(void) const - { - return static_cast(thrust::distance(first1, last1) + thrust::distance(first2, last2)) > grain_size; - } -}; - -struct body -{ - template - void operator()(Range& r) const - { - thrust::system::detail::internal::scalar::merge - (r.first1, r.last1, - r.first2, r.last2, - r.result, - r.comp); - } -}; - -} // end namespace merge_detail - -namespace merge_by_key_detail -{ - -template -struct range -{ - InputIterator1 keys_first1, keys_last1; - InputIterator2 keys_first2, keys_last2; - InputIterator3 values_first1; - InputIterator4 values_first2; - OutputIterator1 keys_result; - OutputIterator2 values_result; - StrictWeakOrdering comp; - size_t grain_size; - - range(InputIterator1 keys_first1, InputIterator1 keys_last1, - InputIterator2 keys_first2, InputIterator2 keys_last2, - InputIterator3 values_first1, - InputIterator4 values_first2, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp, - size_t grain_size = 1024) - : keys_first1(keys_first1), keys_last1(keys_last1), - keys_first2(keys_first2), keys_last2(keys_last2), - values_first1(values_first1), - values_first2(values_first2), - keys_result(keys_result), values_result(values_result), - comp(comp), grain_size(grain_size) - {} - - range(range& r, ::tbb::split) - : keys_first1(r.keys_first1), keys_last1(r.keys_last1), - keys_first2(r.keys_first2), keys_last2(r.keys_last2), - values_first1(r.values_first1), - values_first2(r.values_first2), - keys_result(r.keys_result), values_result(r.values_result), - comp(r.comp), grain_size(r.grain_size) - { - // we can assume n1 and n2 are not both 0 - size_t n1 = thrust::distance(keys_first1, keys_last1); - size_t n2 = thrust::distance(keys_first2, keys_last2); - - InputIterator1 mid1 = keys_first1; - InputIterator2 mid2 = keys_first2; - - if (n1 > n2) - { - mid1 += n1 / 2; - mid2 = thrust::system::detail::internal::scalar::lower_bound(keys_first2, keys_last2, raw_reference_cast(*mid1), comp); - } - else - { - mid2 += n2 / 2; - mid1 = thrust::system::detail::internal::scalar::upper_bound(keys_first1, keys_last1, raw_reference_cast(*mid2), comp); - } - - // set first range to [keys_first1, mid1), [keys_first2, mid2), keys_result, values_result - r.keys_last1 = mid1; - r.keys_last2 = mid2; - - // set second range to [mid1, keys_last1), [mid2, keys_last2), keys_result + (mid1 - keys_first1) + (mid2 - keys_first2), values_result + (mid1 - keys_first1) + (mid2 - keys_first2) - keys_first1 = mid1; - keys_first2 = mid2; - values_first1 += thrust::distance(r.keys_first1, mid1); - values_first2 += thrust::distance(r.keys_first2, mid2); - keys_result += thrust::distance(r.keys_first1, mid1) + thrust::distance(r.keys_first2, mid2); - values_result += thrust::distance(r.keys_first1, mid1) + thrust::distance(r.keys_first2, mid2); - } - - bool empty(void) const - { - return (keys_first1 == keys_last1) && (keys_first2 == keys_last2); - } - - bool is_divisible(void) const - { - return static_cast(thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2)) > grain_size; - } -}; - -struct body -{ - template - void operator()(Range& r) const - { - thrust::system::detail::internal::scalar::merge_by_key - (r.keys_first1, r.keys_last1, - r.keys_first2, r.keys_last2, - r.values_first1, - r.values_first2, - r.keys_result, - r.values_result, - r.comp); - } -}; - -} // end namespace merge_by_key_detail - - -template -OutputIterator merge(execution_policy &exec, - InputIterator1 first1, - InputIterator1 last1, - InputIterator2 first2, - InputIterator2 last2, - OutputIterator result, - StrictWeakOrdering comp) -{ - typedef typename merge_detail::range Range; - typedef merge_detail::body Body; - Range range(first1, last1, first2, last2, result, comp); - Body body; - - ::tbb::parallel_for(range, body); - - thrust::advance(result, thrust::distance(first1, last1) + thrust::distance(first2, last2)); - - return result; -} // end merge() - -template -thrust::pair - merge_by_key(execution_policy &exec, - InputIterator1 keys_first1, - InputIterator1 keys_last1, - InputIterator2 keys_first2, - InputIterator2 keys_last2, - InputIterator3 values_first3, - InputIterator4 values_first4, - OutputIterator1 keys_result, - OutputIterator2 values_result, - StrictWeakOrdering comp) -{ - typedef typename merge_by_key_detail::range Range; - typedef merge_by_key_detail::body Body; - - Range range(keys_first1, keys_last1, keys_first2, keys_last2, values_first3, values_first4, keys_result, values_result, comp); - Body body; - - ::tbb::parallel_for(range, body); - - thrust::advance(keys_result, thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2)); - thrust::advance(values_result, thrust::distance(keys_first1, keys_last1) + thrust::distance(keys_first2, keys_last2)); - - return thrust::make_pair(keys_result,values_result); -} - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/mismatch.h b/compat/thrust/system/tbb/detail/mismatch.h deleted file mode 100644 index 03980cfcd5..0000000000 --- a/compat/thrust/system/tbb/detail/mismatch.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits mismatch -#include - diff --git a/compat/thrust/system/tbb/detail/par.h b/compat/thrust/system/tbb/detail/par.h deleted file mode 100644 index 74801ab914..0000000000 --- a/compat/thrust/system/tbb/detail/par.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -struct par_t : thrust::system::tbb::detail::execution_policy -{ - par_t() : thrust::system::tbb::detail::execution_policy() {} - - template - thrust::detail::execute_with_allocator - operator()(Allocator &alloc) const - { - return thrust::detail::execute_with_allocator(alloc); - } -}; - - -} // end detail - - -static const detail::par_t par; - - -} // end tbb -} // end system - - -// alias par here -namespace tbb -{ - - -using thrust::system::tbb::par; - - -} // end tbb -} // end thrust - diff --git a/compat/thrust/system/tbb/detail/partition.h b/compat/thrust/system/tbb/detail/partition.h deleted file mode 100644 index af37121888..0000000000 --- a/compat/thrust/system/tbb/detail/partition.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - ForwardIterator stable_partition(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - -template - ForwardIterator stable_partition(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - -template - thrust::pair - stable_partition_copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -template - thrust::pair - stable_partition_copy(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred); - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/partition.inl b/compat/thrust/system/tbb/detail/partition.inl deleted file mode 100644 index 1e421e10f3..0000000000 --- a/compat/thrust/system/tbb/detail/partition.inl +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - ForwardIterator stable_partition(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - // tbb prefers generic::stable_partition to cpp::stable_partition - return thrust::system::detail::generic::stable_partition(exec, first, last, pred); -} // end stable_partition() - - -template - ForwardIterator stable_partition(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - // tbb prefers generic::stable_partition to cpp::stable_partition - return thrust::system::detail::generic::stable_partition(exec, first, last, stencil, pred); -} // end stable_partition() - -template - thrust::pair - stable_partition_copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - // tbb prefers generic::stable_partition_copy to cpp::stable_partition_copy - return thrust::system::detail::generic::stable_partition_copy(exec, first, last, out_true, out_false, pred); -} // end stable_partition_copy() - - -template - thrust::pair - stable_partition_copy(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator1 out_true, - OutputIterator2 out_false, - Predicate pred) -{ - // tbb prefers generic::stable_partition_copy to cpp::stable_partition_copy - return thrust::system::detail::generic::stable_partition_copy(exec, first, last, stencil, out_true, out_false, pred); -} // end stable_partition_copy() - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/reduce.h b/compat/thrust/system/tbb/detail/reduce.h deleted file mode 100644 index 83a7cc3214..0000000000 --- a/compat/thrust/system/tbb/detail/reduce.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file reduce.h - * \brief TBB implementation of reduce. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - OutputType reduce(execution_policy &exec, - InputIterator begin, - InputIterator end, - OutputType init, - BinaryFunction binary_op); - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/reduce.inl b/compat/thrust/system/tbb/detail/reduce.inl deleted file mode 100644 index c249852769..0000000000 --- a/compat/thrust/system/tbb/detail/reduce.inl +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace reduce_detail -{ - -template -struct body -{ - RandomAccessIterator first; - OutputType sum; - bool first_call; // TBB can invoke operator() multiple times on the same body - thrust::detail::host_function binary_op; - - // note: we only initalize sum with init to avoid calling OutputType's default constructor - body(RandomAccessIterator first, OutputType init, BinaryFunction binary_op) - : first(first), sum(init), first_call(true), binary_op(binary_op) - {} - - // note: we only initalize sum with b.sum to avoid calling OutputType's default constructor - body(body& b, ::tbb::split) - : first(b.first), sum(b.sum), first_call(true), binary_op(b.binary_op) - {} - - template - void operator()(const ::tbb::blocked_range &r) - { - // we assume that blocked_range specifies a contiguous range of integers - - if (r.empty()) return; // nothing to do - - RandomAccessIterator iter = first + r.begin(); - - OutputType temp = thrust::raw_reference_cast(*iter); - - ++iter; - - for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter) - temp = binary_op(temp, *iter); - - - if (first_call) - { - // first time body has been invoked - first_call = false; - sum = temp; - } - else - { - // body has been previously invoked, accumulate temp into sum - sum = binary_op(sum, temp); - } - } // end operator()() - - void join(body& b) - { - sum = binary_op(sum, b.sum); - } -}; // end body - -} // end reduce_detail - - -template - OutputType reduce(execution_policy &exec, - InputIterator begin, - InputIterator end, - OutputType init, - BinaryFunction binary_op) -{ - typedef typename thrust::iterator_difference::type Size; - - Size n = thrust::distance(begin, end); - - if (n == 0) - { - return init; - } - else - { - typedef typename reduce_detail::body Body; - Body reduce_body(begin, init, binary_op); - ::tbb::parallel_reduce(::tbb::blocked_range(0,n), reduce_body); - return binary_op(init, reduce_body.sum); - } -} - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/reduce_by_key.h b/compat/thrust/system/tbb/detail/reduce_by_key.h deleted file mode 100644 index 0149a763f7..0000000000 --- a/compat/thrust/system/tbb/detail/reduce_by_key.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - thrust::pair - reduce_by_key(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op); - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/reduce_by_key.inl b/compat/thrust/system/tbb/detail/reduce_by_key.inl deleted file mode 100644 index 10d2d8b4a8..0000000000 --- a/compat/thrust/system/tbb/detail/reduce_by_key.inl +++ /dev/null @@ -1,344 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace reduce_by_key_detail -{ - - -template - inline L divide_ri(const L x, const R y) -{ - return (x + (y - 1)) / y; -} - - -template - struct partial_sum_type - : thrust::detail::eval_if< - thrust::detail::has_result_type::value, - thrust::detail::result_type, - thrust::detail::eval_if< - thrust::detail::is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - > -{}; - - -template - struct partial_sum_type - : thrust::detail::eval_if< - thrust::detail::has_result_type::value, - thrust::detail::result_type, - thrust::iterator_value - > -{}; - - -template - thrust::pair< - InputIterator1, - thrust::pair< - typename InputIterator1::value_type, - typename partial_sum_type::type - > - > - reduce_last_segment_backward(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - typename thrust::iterator_difference::type n = keys_last - keys_first; - - // reverse the ranges and consume from the end - thrust::reverse_iterator keys_first_r(keys_last); - thrust::reverse_iterator keys_last_r(keys_first); - thrust::reverse_iterator values_first_r(values_first + n); - - typename InputIterator1::value_type result_key = *keys_first_r; - typename partial_sum_type::type result_value = *values_first_r; - - // consume the entirety of the first key's sequence - for(++keys_first_r, ++values_first_r; - (keys_first_r != keys_last_r) && binary_pred(*keys_first_r, result_key); - ++keys_first_r, ++values_first_r) - { - result_value = binary_op(result_value, *values_first_r); - } - - return thrust::make_pair(keys_first_r.base(), thrust::make_pair(result_key, result_value)); -} - - -template - thrust::tuple< - OutputIterator1, - OutputIterator2, - typename InputIterator1::value_type, - typename partial_sum_type::type - > - reduce_by_key_with_carry(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - // first, consume the last sequence to produce the carry - // XXX is there an elegant way to pose this such that we don't need to default construct carry? - thrust::pair< - typename InputIterator1::value_type, - typename partial_sum_type::type - > carry; - - thrust::tie(keys_last, carry) = reduce_last_segment_backward(keys_first, keys_last, values_first, binary_pred, binary_op); - - // finish with sequential reduce_by_key - thrust::cpp::tag seq; - thrust::tie(keys_output, values_output) = - thrust::reduce_by_key(seq, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); - - return thrust::make_tuple(keys_output, values_output, carry.first, carry.second); -} - - -template - bool interval_has_carry(size_t interval_idx, size_t interval_size, size_t num_intervals, Iterator tail_flags) -{ - // to discover whether the interval has a carry, look at the tail_flag corresponding to its last element - // the final interval never has a carry by definition - return (interval_idx + 1 < num_intervals) ? !tail_flags[(interval_idx + 1) * interval_size - 1] : false; -} - - -template - struct serial_reduce_by_key_body -{ - typedef typename thrust::iterator_difference::type size_type; - - Iterator1 keys_first; - Iterator2 values_first; - Iterator3 result_offset; - Iterator4 keys_result; - Iterator5 values_result; - Iterator6 carry_result; - - size_type n; - size_type interval_size; - size_type num_intervals; - - BinaryPredicate binary_pred; - BinaryFunction binary_op; - - serial_reduce_by_key_body(Iterator1 keys_first, Iterator2 values_first, Iterator3 result_offset, Iterator4 keys_result, Iterator5 values_result, Iterator6 carry_result, size_type n, size_type interval_size, size_type num_intervals, BinaryPredicate binary_pred, BinaryFunction binary_op) - : keys_first(keys_first), values_first(values_first), - result_offset(result_offset), - keys_result(keys_result), - values_result(values_result), - carry_result(carry_result), - n(n), - interval_size(interval_size), - num_intervals(num_intervals), - binary_pred(binary_pred), - binary_op(binary_op) - {} - - void operator()(const ::tbb::blocked_range &r) const - { - assert(r.size() == 1); - - const size_type interval_idx = r.begin(); - - const size_type offset_to_first = interval_size * interval_idx; - const size_type offset_to_last = thrust::min(n, offset_to_first + interval_size); - - Iterator1 my_keys_first = keys_first + offset_to_first; - Iterator1 my_keys_last = keys_first + offset_to_last; - Iterator2 my_values_first = values_first + offset_to_first; - Iterator3 my_result_offset = result_offset + interval_idx; - Iterator4 my_keys_result = keys_result + *my_result_offset; - Iterator5 my_values_result = values_result + *my_result_offset; - Iterator6 my_carry_result = carry_result + interval_idx; - - // consume the rest of the interval with reduce_by_key - typedef typename thrust::iterator_value::type key_type; - typedef typename partial_sum_type::type value_type; - - // XXX is there a way to pose this so that we don't require default construction of carry? - thrust::pair carry; - - thrust::tie(my_keys_result, my_values_result, carry.first, carry.second) = - reduce_by_key_with_carry(my_keys_first, - my_keys_last, - my_values_first, - my_keys_result, - my_values_result, - binary_pred, - binary_op); - - // store to carry only when we actually have a carry - // store to my_keys_result & my_values_result otherwise - - // create tail_flags so we can check for a carry - thrust::detail::tail_flags flags = thrust::detail::make_tail_flags(keys_first, keys_first + n, binary_pred); - - if(interval_has_carry(interval_idx, interval_size, num_intervals, flags.begin())) - { - // we can ignore the carry's key - // XXX because the carry result is uninitialized, we should copy construct - *my_carry_result = carry.second; - } - else - { - *my_keys_result = carry.first; - *my_values_result = carry.second; - } - } -}; - - -template - serial_reduce_by_key_body - make_serial_reduce_by_key_body(Iterator1 keys_first, Iterator2 values_first, Iterator3 result_offset, Iterator4 keys_result, Iterator5 values_result, Iterator6 carry_result, typename thrust::iterator_difference::type n, size_t interval_size, size_t num_intervals, BinaryPredicate binary_pred, BinaryFunction binary_op) -{ - return serial_reduce_by_key_body(keys_first, values_first, result_offset, keys_result, values_result, carry_result, n, interval_size, num_intervals, binary_pred, binary_op); -} - - -} // end reduce_by_key_detail - - -template - thrust::pair - reduce_by_key(thrust::tbb::execution_policy &exec, - Iterator1 keys_first, Iterator1 keys_last, - Iterator2 values_first, - Iterator3 keys_result, - Iterator4 values_result, - BinaryPredicate binary_pred, - BinaryFunction binary_op) -{ - - typedef typename thrust::iterator_difference::type difference_type; - difference_type n = keys_last - keys_first; - if(n == 0) return thrust::make_pair(keys_result, values_result); - - // XXX this value is a tuning opportunity - const difference_type parallelism_threshold = 10000; - - if(n < parallelism_threshold) - { - // don't bother parallelizing for small n - thrust::cpp::tag seq; - return thrust::reduce_by_key(seq, keys_first, keys_last, values_first, keys_result, values_result, binary_pred, binary_op); - } - - // count the number of processors - const unsigned int p = thrust::max(1u, ::tbb::tbb_thread::hardware_concurrency()); - - // generate O(P) intervals of sequential work - // XXX oversubscribing is a tuning opportunity - const unsigned int subscription_rate = 1; - difference_type interval_size = thrust::min(parallelism_threshold, thrust::max(n, n / (subscription_rate * p))); - difference_type num_intervals = reduce_by_key_detail::divide_ri(n, interval_size); - - // decompose the input into intervals of size N / num_intervals - // add one extra element to this vector to store the size of the entire result - thrust::detail::temporary_array interval_output_offsets(0, exec, num_intervals + 1); - - // first count the number of tail flags in each interval - thrust::detail::tail_flags tail_flags = thrust::detail::make_tail_flags(keys_first, keys_last, binary_pred); - thrust::system::tbb::detail::reduce_intervals(exec, tail_flags.begin(), tail_flags.end(), interval_size, interval_output_offsets.begin() + 1, thrust::plus()); - interval_output_offsets[0] = 0; - - // scan the counts to get each body's output offset - thrust::cpp::tag seq; - thrust::inclusive_scan(seq, - interval_output_offsets.begin() + 1, interval_output_offsets.end(), - interval_output_offsets.begin() + 1); - - // do a reduce_by_key serially in each thread - // the final interval never has a carry by definition, so don't reserve space for it - typedef typename reduce_by_key_detail::partial_sum_type::type carry_type; - thrust::detail::temporary_array carries(0, exec, num_intervals - 1); - - // force grainsize == 1 with simple_partioner() - ::tbb::parallel_for(::tbb::blocked_range(0, num_intervals, 1), - reduce_by_key_detail::make_serial_reduce_by_key_body(keys_first, values_first, interval_output_offsets.begin(), keys_result, values_result, carries.begin(), n, interval_size, num_intervals, binary_pred, binary_op), - ::tbb::simple_partitioner()); - - difference_type size_of_result = interval_output_offsets[num_intervals]; - - // sequentially accumulate the carries - // note that the last interval does not have a carry - // XXX find a way to express this loop via a sequential algorithm, perhaps reduce_by_key - for(typename thrust::detail::temporary_array::size_type i = 0; i < carries.size(); ++i) - { - // if our interval has a carry, then we need to sum the carry to the next interval's output offset - // if it does not have a carry, then we need to ignore carry_value[i] - if(reduce_by_key_detail::interval_has_carry(i, interval_size, num_intervals, tail_flags.begin())) - { - difference_type output_idx = interval_output_offsets[i+1]; - - values_result[output_idx] = binary_op(values_result[output_idx], carries[i]); - } - } - - return thrust::make_pair(keys_result + size_of_result, values_result + size_of_result); -} - - -} // end detail -} // end tbb -} // end system -} // end thrust - diff --git a/compat/thrust/system/tbb/detail/reduce_intervals.h b/compat/thrust/system/tbb/detail/reduce_intervals.h deleted file mode 100644 index 0647ffd464..0000000000 --- a/compat/thrust/system/tbb/detail/reduce_intervals.h +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace reduce_intervals_detail -{ - - -template - inline L divide_ri(const L x, const R y) -{ - return (x + (y - 1)) / y; -} - - -template - struct body -{ - RandomAccessIterator1 first; - RandomAccessIterator2 result; - Size n, interval_size; - BinaryFunction binary_op; - - body(RandomAccessIterator1 first, RandomAccessIterator2 result, Size n, Size interval_size, BinaryFunction binary_op) - : first(first), result(result), n(n), interval_size(interval_size), binary_op(binary_op) - {} - - void operator()(const ::tbb::blocked_range &r) const - { - assert(r.size() == 1); - - Size interval_idx = r.begin(); - - Size offset_to_first = interval_size * interval_idx; - Size offset_to_last = thrust::min(n, offset_to_first + interval_size); - - RandomAccessIterator1 my_first = first + offset_to_first; - RandomAccessIterator1 my_last = first + offset_to_last; - - thrust::cpp::tag seq; - - // carefully pass the init value for the interval with raw_reference_cast - typedef typename BinaryFunction::result_type sum_type; - result[interval_idx] = - thrust::reduce(seq, my_first + 1, my_last, sum_type(thrust::raw_reference_cast(*my_first)), binary_op); - } -}; - - -template - body - make_body(RandomAccessIterator1 first, RandomAccessIterator2 result, Size n, Size interval_size, BinaryFunction binary_op) -{ - return body(first, result, n, interval_size, binary_op); -} - - -} // end reduce_intervals_detail - - -template - void reduce_intervals(thrust::tbb::execution_policy &, - RandomAccessIterator1 first, - RandomAccessIterator1 last, - Size interval_size, - RandomAccessIterator2 result, - BinaryFunction binary_op) -{ - typename thrust::iterator_difference::type n = last - first; - - Size num_intervals = reduce_intervals_detail::divide_ri(n, interval_size); - - ::tbb::parallel_for(::tbb::blocked_range(0, num_intervals, 1), reduce_intervals_detail::make_body(first, result, Size(n), interval_size, binary_op), ::tbb::simple_partitioner()); -} - - -template - void reduce_intervals(thrust::tbb::execution_policy &exec, - RandomAccessIterator1 first, - RandomAccessIterator1 last, - Size interval_size, - RandomAccessIterator2 result) -{ - typedef typename thrust::iterator_value::type value_type; - - return thrust::system::tbb::detail::reduce_intervals(exec, first, last, interval_size, result, thrust::plus()); -} - - -} // end detail -} // end tbb -} // end system -} // end thrust - diff --git a/compat/thrust/system/tbb/detail/remove.h b/compat/thrust/system/tbb/detail/remove.h deleted file mode 100644 index 48cbb5c322..0000000000 --- a/compat/thrust/system/tbb/detail/remove.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace omp -{ -namespace detail -{ - -template - ForwardIterator remove_if(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred); - - -template - ForwardIterator remove_if(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred); - - -template - OutputIterator remove_copy_if(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred); - - -template - OutputIterator remove_copy_if(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred); - - -} // end namespace detail -} // end namespace omp -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/remove.inl b/compat/thrust/system/tbb/detail/remove.inl deleted file mode 100644 index 01916c52c8..0000000000 --- a/compat/thrust/system/tbb/detail/remove.inl +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - -template - ForwardIterator remove_if(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - Predicate pred) -{ - // tbb prefers generic::remove_if to cpp::remove_if - return thrust::system::detail::generic::remove_if(exec, first, last, pred); -} - - -template - ForwardIterator remove_if(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - InputIterator stencil, - Predicate pred) -{ - // tbb prefers generic::remove_if to cpp::remove_if - return thrust::system::detail::generic::remove_if(exec, first, last, stencil, pred); -} - - -template - OutputIterator remove_copy_if(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - Predicate pred) -{ - // tbb prefers generic::remove_copy_if to cpp::remove_copy_if - return thrust::system::detail::generic::remove_copy_if(exec, first, last, result, pred); -} - -template - OutputIterator remove_copy_if(execution_policy &exec, - InputIterator1 first, - InputIterator1 last, - InputIterator2 stencil, - OutputIterator result, - Predicate pred) -{ - // tbb prefers generic::remove_copy_if to cpp::remove_copy_if - return thrust::system::detail::generic::remove_copy_if(exec, first, last, stencil, result, pred); -} - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/replace.h b/compat/thrust/system/tbb/detail/replace.h deleted file mode 100644 index c48555d0e1..0000000000 --- a/compat/thrust/system/tbb/detail/replace.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits this algorithm -#include - diff --git a/compat/thrust/system/tbb/detail/reverse.h b/compat/thrust/system/tbb/detail/reverse.h deleted file mode 100644 index 04923d1f6a..0000000000 --- a/compat/thrust/system/tbb/detail/reverse.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits reverse -#include - diff --git a/compat/thrust/system/tbb/detail/scan.h b/compat/thrust/system/tbb/detail/scan.h deleted file mode 100644 index ed5cacd7e9..0000000000 --- a/compat/thrust/system/tbb/detail/scan.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file scan.h - * \brief TBB implementations of scan functions. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - -template - OutputIterator inclusive_scan(tag, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op); - - -template - OutputIterator exclusive_scan(tag, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - BinaryFunction binary_op); - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/scan.inl b/compat/thrust/system/tbb/detail/scan.inl deleted file mode 100644 index 48878241c9..0000000000 --- a/compat/thrust/system/tbb/detail/scan.inl +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace scan_detail -{ - -template -struct inclusive_body -{ - InputIterator input; - OutputIterator output; - thrust::detail::host_function binary_op; - ValueType sum; - bool first_call; - - inclusive_body(InputIterator input, OutputIterator output, BinaryFunction binary_op, ValueType dummy) - : input(input), output(output), binary_op(binary_op), sum(dummy), first_call(true) - {} - - inclusive_body(inclusive_body& b, ::tbb::split) - : input(b.input), output(b.output), binary_op(b.binary_op), sum(b.sum), first_call(true) - {} - - template - void operator()(const ::tbb::blocked_range& r, ::tbb::pre_scan_tag) - { - InputIterator iter = input + r.begin(); - - ValueType temp = *iter; - - ++iter; - - for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter) - temp = binary_op(temp, *iter); - - if (first_call) - sum = temp; - else - sum = binary_op(sum, temp); - - first_call = false; - } - - template - void operator()(const ::tbb::blocked_range& r, ::tbb::final_scan_tag) - { - InputIterator iter1 = input + r.begin(); - OutputIterator iter2 = output + r.begin(); - - if (first_call) - { - *iter2 = sum = *iter1; - ++iter1; - ++iter2; - for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter1, ++iter2) - *iter2 = sum = binary_op(sum, *iter1); - } - else - { - for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2) - *iter2 = sum = binary_op(sum, *iter1); - } - - first_call = false; - } - - void reverse_join(inclusive_body& b) - { - sum = binary_op(b.sum, sum); - } - - void assign(inclusive_body& b) - { - sum = b.sum; - } -}; - - -template -struct exclusive_body -{ - InputIterator input; - OutputIterator output; - thrust::detail::host_function binary_op; - ValueType sum; - bool first_call; - - exclusive_body(InputIterator input, OutputIterator output, BinaryFunction binary_op, ValueType init) - : input(input), output(output), binary_op(binary_op), sum(init), first_call(true) - {} - - exclusive_body(exclusive_body& b, ::tbb::split) - : input(b.input), output(b.output), binary_op(b.binary_op), sum(b.sum), first_call(true) - {} - - template - void operator()(const ::tbb::blocked_range& r, ::tbb::pre_scan_tag) - { - InputIterator iter = input + r.begin(); - - ValueType temp = *iter; - - ++iter; - - for (Size i = r.begin() + 1; i != r.end(); ++i, ++iter) - temp = binary_op(temp, *iter); - - if (first_call && r.begin() > 0) - sum = temp; - else - sum = binary_op(sum, temp); - - first_call = false; - } - - template - void operator()(const ::tbb::blocked_range& r, ::tbb::final_scan_tag) - { - InputIterator iter1 = input + r.begin(); - OutputIterator iter2 = output + r.begin(); - - for (Size i = r.begin(); i != r.end(); ++i, ++iter1, ++iter2) - { - ValueType temp = binary_op(sum, *iter1); - *iter2 = sum; - sum = temp; - } - - first_call = false; - } - - void reverse_join(exclusive_body& b) - { - sum = binary_op(b.sum, sum); - } - - void assign(exclusive_body& b) - { - sum = b.sum; - } -}; - -} // end scan_detail - - - -template - OutputIterator inclusive_scan(tag, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryFunction binary_op) -{ - // the pseudocode for deducing the type of the temporary used below: - // - // if BinaryFunction is AdaptableBinaryFunction - // TemporaryType = AdaptableBinaryFunction::result_type - // else if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - using namespace thrust::detail; - - typedef typename eval_if< - has_result_type::value, - result_type, - eval_if< - is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - typedef typename thrust::iterator_difference::type Size; - - Size n = thrust::distance(first, last); - - if (n != 0) - { - typedef typename scan_detail::inclusive_body Body; - Body scan_body(first, result, binary_op, *first); - ::tbb::parallel_scan(::tbb::blocked_range(0,n), scan_body); - } - - thrust::advance(result, n); - - return result; -} - - -template - OutputIterator exclusive_scan(tag, - InputIterator first, - InputIterator last, - OutputIterator result, - T init, - BinaryFunction binary_op) -{ - // the pseudocode for deducing the type of the temporary used below: - // - // if BinaryFunction is AdaptableBinaryFunction - // TemporaryType = AdaptableBinaryFunction::result_type - // else if OutputIterator is a "pure" output iterator - // TemporaryType = InputIterator::value_type - // else - // TemporaryType = OutputIterator::value_type - // - // XXX upon c++0x, TemporaryType needs to be: - // result_of::type - - using namespace thrust::detail; - - typedef typename eval_if< - has_result_type::value, - result_type, - eval_if< - is_output_iterator::value, - thrust::iterator_value, - thrust::iterator_value - > - >::type ValueType; - - typedef typename thrust::iterator_difference::type Size; - - Size n = thrust::distance(first, last); - - if (n != 0) - { - typedef typename scan_detail::exclusive_body Body; - Body scan_body(first, result, binary_op, init); - ::tbb::parallel_scan(::tbb::blocked_range(0,n), scan_body); - } - - thrust::advance(result, n); - - return result; -} - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/scan_by_key.h b/compat/thrust/system/tbb/detail/scan_by_key.h deleted file mode 100644 index cad4fc1454..0000000000 --- a/compat/thrust/system/tbb/detail/scan_by_key.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits scan_by_key -#include - diff --git a/compat/thrust/system/tbb/detail/scatter.h b/compat/thrust/system/tbb/detail/scatter.h deleted file mode 100644 index c48555d0e1..0000000000 --- a/compat/thrust/system/tbb/detail/scatter.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits this algorithm -#include - diff --git a/compat/thrust/system/tbb/detail/sequence.h b/compat/thrust/system/tbb/detail/sequence.h deleted file mode 100644 index 811d8f5fbb..0000000000 --- a/compat/thrust/system/tbb/detail/sequence.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits sequence -#include - diff --git a/compat/thrust/system/tbb/detail/set_operations.h b/compat/thrust/system/tbb/detail/set_operations.h deleted file mode 100644 index 687edb2e7d..0000000000 --- a/compat/thrust/system/tbb/detail/set_operations.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits set_operations -#include - diff --git a/compat/thrust/system/tbb/detail/sort.h b/compat/thrust/system/tbb/detail/sort.h deleted file mode 100644 index 3b6f63075e..0000000000 --- a/compat/thrust/system/tbb/detail/sort.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - -template - void stable_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp); - -template - void stable_sort_by_key(execution_policy &exec, - RandomAccessIterator1 keys_first, - RandomAccessIterator1 keys_last, - RandomAccessIterator2 values_first, - StrictWeakOrdering comp); - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/sort.inl b/compat/thrust/system/tbb/detail/sort.inl deleted file mode 100644 index f292789067..0000000000 --- a/compat/thrust/system/tbb/detail/sort.inl +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ -namespace sort_detail -{ - -// TODO tune this based on data type and comp -const static int threshold = 128 * 1024; - -template -void merge_sort(execution_policy &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace); - -template -struct merge_sort_closure -{ - execution_policy &exec; - Iterator1 first1, last1; - Iterator2 first2; - StrictWeakOrdering comp; - bool inplace; - - merge_sort_closure(execution_policy &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace) - : exec(exec), first1(first1), last1(last1), first2(first2), comp(comp), inplace(inplace) - {} - - void operator()(void) const - { - merge_sort(exec, first1, last1, first2, comp, inplace); - } -}; - - -template -void merge_sort(execution_policy &exec, Iterator1 first1, Iterator1 last1, Iterator2 first2, StrictWeakOrdering comp, bool inplace) -{ - typedef typename thrust::iterator_difference::type difference_type; - - difference_type n = thrust::distance(first1, last1); - - if (n < threshold) - { - thrust::system::detail::internal::scalar::stable_sort(first1, last1, comp); - - if (!inplace) - thrust::system::detail::internal::scalar::copy(first1, last1, first2); - - return; - } - - Iterator1 mid1 = first1 + (n / 2); - Iterator2 mid2 = first2 + (n / 2); - Iterator2 last2 = first2 + n; - - typedef merge_sort_closure Closure; - - Closure left (exec, first1, mid1, first2, comp, !inplace); - Closure right(exec, mid1, last1, mid2, comp, !inplace); - - ::tbb::parallel_invoke(left, right); - - if (inplace) thrust::merge(exec, first2, mid2, mid2, last2, first1, comp); - else thrust::merge(exec, first1, mid1, mid1, last1, first2, comp); -} - -} // end namespace sort_detail - - -namespace sort_by_key_detail -{ - -// TODO tune this based on data type and comp -const static int threshold = 128 * 1024; - -template -void merge_sort_by_key(execution_policy &exec, - Iterator1 first1, - Iterator1 last1, - Iterator2 first2, - Iterator3 first3, - Iterator4 first4, - StrictWeakOrdering comp, - bool inplace); - -template -struct merge_sort_by_key_closure -{ - execution_policy &exec; - Iterator1 first1, last1; - Iterator2 first2; - Iterator3 first3; - Iterator4 first4; - StrictWeakOrdering comp; - bool inplace; - - merge_sort_by_key_closure(execution_policy &exec, - Iterator1 first1, - Iterator1 last1, - Iterator2 first2, - Iterator3 first3, - Iterator4 first4, - StrictWeakOrdering comp, - bool inplace) - : exec(exec), first1(first1), last1(last1), first2(first2), first3(first3), first4(first4), comp(comp), inplace(inplace) - {} - - void operator()(void) const - { - merge_sort_by_key(exec, first1, last1, first2, first3, first4, comp, inplace); - } -}; - - -template -void merge_sort_by_key(execution_policy &exec, - Iterator1 first1, - Iterator1 last1, - Iterator2 first2, - Iterator3 first3, - Iterator4 first4, - StrictWeakOrdering comp, - bool inplace) -{ - typedef typename thrust::iterator_difference::type difference_type; - - difference_type n = thrust::distance(first1, last1); - - Iterator1 mid1 = first1 + (n / 2); - Iterator2 mid2 = first2 + (n / 2); - Iterator3 mid3 = first3 + (n / 2); - Iterator4 mid4 = first4 + (n / 2); - Iterator2 last2 = first2 + n; - Iterator3 last3 = first3 + n; - - if (n < threshold) - { - thrust::system::detail::internal::scalar::stable_sort_by_key(first1, last1, first2, comp); - - if (!inplace) - { - thrust::system::detail::internal::scalar::copy(first1, last1, first3); - thrust::system::detail::internal::scalar::copy(first2, last2, first4); - } - - return; - } - - typedef merge_sort_by_key_closure Closure; - - Closure left (exec, first1, mid1, first2, first3, first4, comp, !inplace); - Closure right(exec, mid1, last1, mid2, mid3, mid4, comp, !inplace); - - ::tbb::parallel_invoke(left, right); - - if(inplace) - { - thrust::merge_by_key(exec, first3, mid3, mid3, last3, first4, mid4, first1, first2, comp); - } - else - { - thrust::merge_by_key(exec, first1, mid1, mid1, last1, first2, mid2, first3, first4, comp); - } -} - -} // end namespace sort_detail - -template -void stable_sort(execution_policy &exec, - RandomAccessIterator first, - RandomAccessIterator last, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_value::type key_type; - - thrust::detail::temporary_array temp(exec, first, last); - - sort_detail::merge_sort(exec, first, last, temp.begin(), comp, true); -} - -template - void stable_sort_by_key(execution_policy &exec, - RandomAccessIterator1 first1, - RandomAccessIterator1 last1, - RandomAccessIterator2 first2, - StrictWeakOrdering comp) -{ - typedef typename thrust::iterator_value::type key_type; - typedef typename thrust::iterator_value::type val_type; - - RandomAccessIterator2 last2 = first2 + thrust::distance(first1, last1); - - thrust::detail::temporary_array temp1(exec, first1, last1); - thrust::detail::temporary_array temp2(exec, first2, last2); - - sort_by_key_detail::merge_sort_by_key(exec, first1, last1, first2, temp1.begin(), temp2.begin(), comp, true); -} - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/swap_ranges.h b/compat/thrust/system/tbb/detail/swap_ranges.h deleted file mode 100644 index 15f8f55310..0000000000 --- a/compat/thrust/system/tbb/detail/swap_ranges.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// tbb inherits swap_ranges -#include - diff --git a/compat/thrust/system/tbb/detail/tabulate.h b/compat/thrust/system/tbb/detail/tabulate.h deleted file mode 100644 index da65d8e44d..0000000000 --- a/compat/thrust/system/tbb/detail/tabulate.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits tabulate -#include - diff --git a/compat/thrust/system/tbb/detail/temporary_buffer.h b/compat/thrust/system/tbb/detail/temporary_buffer.h deleted file mode 100644 index 628bd75719..0000000000 --- a/compat/thrust/system/tbb/detail/temporary_buffer.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system has no special temporary buffer functions - diff --git a/compat/thrust/system/tbb/detail/transform.h b/compat/thrust/system/tbb/detail/transform.h deleted file mode 100644 index 70ce1f41b6..0000000000 --- a/compat/thrust/system/tbb/detail/transform.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// omp inherits transform -#include - diff --git a/compat/thrust/system/tbb/detail/transform_reduce.h b/compat/thrust/system/tbb/detail/transform_reduce.h deleted file mode 100644 index 23ed07054a..0000000000 --- a/compat/thrust/system/tbb/detail/transform_reduce.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits transform_reduce -#include - diff --git a/compat/thrust/system/tbb/detail/transform_scan.h b/compat/thrust/system/tbb/detail/transform_scan.h deleted file mode 100644 index fc2e55d0c0..0000000000 --- a/compat/thrust/system/tbb/detail/transform_scan.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits transform_scan -#include - diff --git a/compat/thrust/system/tbb/detail/uninitialized_copy.h b/compat/thrust/system/tbb/detail/uninitialized_copy.h deleted file mode 100644 index 944f4baf0e..0000000000 --- a/compat/thrust/system/tbb/detail/uninitialized_copy.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits uninitialized_copy -#include - diff --git a/compat/thrust/system/tbb/detail/uninitialized_fill.h b/compat/thrust/system/tbb/detail/uninitialized_fill.h deleted file mode 100644 index b9d6de20fa..0000000000 --- a/compat/thrust/system/tbb/detail/uninitialized_fill.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -// this system inherits uninitialized_fill -#include - diff --git a/compat/thrust/system/tbb/detail/unique.h b/compat/thrust/system/tbb/detail/unique.h deleted file mode 100644 index 34538cac7b..0000000000 --- a/compat/thrust/system/tbb/detail/unique.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - ForwardIterator unique(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred); - - -template - OutputIterator unique_copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred); - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/unique.inl b/compat/thrust/system/tbb/detail/unique.inl deleted file mode 100644 index 06e6a30bb1..0000000000 --- a/compat/thrust/system/tbb/detail/unique.inl +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - ForwardIterator unique(execution_policy &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred) -{ - // tbb prefers generic::unique to cpp::unique - return thrust::system::detail::generic::unique(exec,first,last,binary_pred); -} // end unique() - - -template - OutputIterator unique_copy(execution_policy &exec, - InputIterator first, - InputIterator last, - OutputIterator output, - BinaryPredicate binary_pred) -{ - // tbb prefers generic::unique_copy to cpp::unique_copy - return thrust::system::detail::generic::unique_copy(exec,first,last,output,binary_pred); -} // end unique_copy() - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/unique_by_key.h b/compat/thrust/system/tbb/detail/unique_by_key.h deleted file mode 100644 index c6d053243f..0000000000 --- a/compat/thrust/system/tbb/detail/unique_by_key.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - thrust::pair - unique_by_key(execution_policy &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred); - - -template - thrust::pair - unique_by_key_copy(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred); - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - -#include - diff --git a/compat/thrust/system/tbb/detail/unique_by_key.inl b/compat/thrust/system/tbb/detail/unique_by_key.inl deleted file mode 100644 index 7747ca4c2e..0000000000 --- a/compat/thrust/system/tbb/detail/unique_by_key.inl +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ -namespace detail -{ - - -template - thrust::pair - unique_by_key(execution_policy &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred) -{ - // tbb prefers generic::unique_by_key to cpp::unique_by_key - return thrust::system::detail::generic::unique_by_key(exec,keys_first,keys_last,values_first,binary_pred); -} // end unique_by_key() - - -template - thrust::pair - unique_by_key_copy(execution_policy &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_output, - OutputIterator2 values_output, - BinaryPredicate binary_pred) -{ - // tbb prefers generic::unique_by_key_copy to cpp::unique_by_key_copy - return thrust::system::detail::generic::unique_by_key_copy(exec,keys_first,keys_last,values_first,keys_output,values_output,binary_pred); -} // end unique_by_key_copy() - - -} // end namespace detail -} // end namespace tbb -} // end namespace system -} // end namespace thrust - diff --git a/compat/thrust/system/tbb/detail/vector.inl b/compat/thrust/system/tbb/detail/vector.inl deleted file mode 100644 index d87e670fd6..0000000000 --- a/compat/thrust/system/tbb/detail/vector.inl +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ - -template - vector - ::vector() - : super_t() -{} - -template - vector - ::vector(size_type n) - : super_t(n) -{} - -template - vector - ::vector(size_type n, const value_type &value) - : super_t(n,value) -{} - -template - vector - ::vector(const vector &x) - : super_t(x) -{} - -template - template - vector - ::vector(const thrust::detail::vector_base &x) - : super_t(x) -{} - -template - template - vector - ::vector(const std::vector &x) - : super_t(x) -{} - -template - template - vector - ::vector(InputIterator first, InputIterator last) - : super_t(first,last) -{} - -template - template - vector & - vector - ::operator=(const std::vector &x) -{ - super_t::operator=(x); - return *this; -} - -template - template - vector & - vector - ::operator=(const thrust::detail::vector_base &x) -{ - super_t::operator=(x); - return *this; -} - -} // end tbb -} // end system -} // end thrust - diff --git a/compat/thrust/system/tbb/execution_policy.h b/compat/thrust/system/tbb/execution_policy.h deleted file mode 100644 index c462586f82..0000000000 --- a/compat/thrust/system/tbb/execution_policy.h +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -/*! \file thrust/system/tbb/execution_policy.h - * \brief Execution policies for Thrust's TBB system. - */ - -#include - -// get the execution policies definitions first -#include - -// get the definition of par -#include - -// now get all the algorithm definitions - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -// define these entities here for the purpose of Doxygenating them -// they are actually defined elsewhere -#if 0 -namespace thrust -{ -namespace system -{ -namespace tbb -{ - - -/*! \addtogroup execution_policies - * \{ - */ - - -/*! \p thrust::tbb::execution_policy is the base class for all Thrust parallel execution - * policies which are derived from Thrust's TBB backend system. - */ -template -struct execution_policy : thrust::execution_policy -{}; - - -/*! \p tbb::tag is a type representing Thrust's TBB backend system in C++'s type system. - * Iterators "tagged" with a type which is convertible to \p tbb::tag assert that they may be - * "dispatched" to algorithm implementations in the \p tbb system. - */ -struct tag : thrust::system::tbb::execution_policy { unspecified }; - - -/*! \p thrust::tbb::par is the parallel execution policy associated with Thrust's TBB - * backend system. - * - * Instead of relying on implicit algorithm dispatch through iterator system tags, users may - * directly target Thrust's TBB backend system by providing \p thrust::tbb::par as an algorithm - * parameter. - * - * Explicit dispatch can be useful in avoiding the introduction of data copies into containers such - * as \p thrust::tbb::vector. - * - * The type of \p thrust::tbb::par is implementation-defined. - * - * The following code snippet demonstrates how to use \p thrust::tbb::par to explicitly dispatch an - * invocation of \p thrust::for_each to the TBB backend system: - * - * \code - * #include - * #include - * #include - * - * struct printf_functor - * { - * __host__ __device__ - * void operator()(int x) - * { - * printf("%d\n"); - * } - * }; - * ... - * int vec[3]; - * vec[0] = 0; vec[1] = 1; vec[2] = 2; - * - * thrust::for_each(thrust::tbb::par, vec.begin(), vec.end(), printf_functor()); - * - * // 0 1 2 is printed to standard output in some unspecified order - * \endcode - */ -static const unspecified par; - - -/*! \} - */ - - -} // end tbb -} // end system -} // end thrust -#endif - - diff --git a/compat/thrust/system/tbb/memory.h b/compat/thrust/system/tbb/memory.h deleted file mode 100644 index deea7eed7d..0000000000 --- a/compat/thrust/system/tbb/memory.h +++ /dev/null @@ -1,414 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/tbb/memory.h - * \brief Managing memory associated with Thrust's TBB system. - */ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ - -template class pointer; - -} // end tbb -} // end system -} // end thrust - - -/*! \cond - */ - -// specialize std::iterator_traits to avoid problems with the name of -// pointer's constructor shadowing its nested pointer type -// do this before pointer is defined so the specialization is correctly -// used inside the definition -namespace std -{ - -template - struct iterator_traits > -{ - private: - typedef thrust::system::tbb::pointer ptr; - - public: - typedef typename ptr::iterator_category iterator_category; - typedef typename ptr::value_type value_type; - typedef typename ptr::difference_type difference_type; - typedef ptr pointer; - typedef typename ptr::reference reference; -}; // end iterator_traits - -} // end std - -/*! \endcond - */ - - -namespace thrust -{ -namespace system -{ - -/*! \addtogroup system_backends Systems - * \ingroup system - * \{ - */ - -/*! \namespace thrust::system::tbb - * \brief \p thrust::system::tbb is the namespace containing functionality for allocating, manipulating, - * and deallocating memory available to Thrust's TBB backend system. - * The identifiers are provided in a separate namespace underneath thrust::system - * for import convenience but are also aliased in the top-level thrust::tbb - * namespace for easy access. - * - */ -namespace tbb -{ - -// forward declaration of reference for pointer -template class reference; - -/*! \cond - */ - -// XXX nvcc + msvc have trouble instantiating reference below -// this is a workaround -namespace detail -{ - -template - struct reference_msvc_workaround -{ - typedef thrust::system::tbb::reference type; -}; // end reference_msvc_workaround - -} // end detail - -/*! \endcond - */ - - -/*! \p pointer stores a pointer to an object allocated in memory available to the tbb system. - * This type provides type safety when dispatching standard algorithms on ranges resident - * in tbb memory. - * - * \p pointer has pointer semantics: it may be dereferenced and manipulated with pointer arithmetic. - * - * \p pointer can be created with the function \p tbb::malloc, or by explicitly calling its constructor - * with a raw pointer. - * - * The raw pointer encapsulated by a \p pointer may be obtained by eiter its get member function - * or the \p raw_pointer_cast function. - * - * \note \p pointer is not a "smart" pointer; it is the programmer's responsibility to deallocate memory - * pointed to by \p pointer. - * - * \tparam T specifies the type of the pointee. - * - * \see tbb::malloc - * \see tbb::free - * \see raw_pointer_cast - */ -template - class pointer - : public thrust::pointer< - T, - thrust::system::tbb::tag, - thrust::system::tbb::reference, - thrust::system::tbb::pointer - > -{ - /*! \cond - */ - - private: - typedef thrust::pointer< - T, - thrust::system::tbb::tag, - //thrust::system::tbb::reference, - typename detail::reference_msvc_workaround::type, - thrust::system::tbb::pointer - > super_t; - - /*! \endcond - */ - - public: - // note that tbb::pointer's member functions need __host__ __device__ - // to interoperate with nvcc + iterators' dereference member function - - /*! \p pointer's no-argument constructor initializes its encapsulated pointer to \c 0. - */ - __host__ __device__ - pointer() : super_t() {} - - /*! This constructor allows construction of a pointer from a T*. - * - * \param ptr A raw pointer to copy from, presumed to point to a location in memory - * accessible by the \p tbb system. - * \tparam OtherT \p OtherT shall be convertible to \p T. - */ - template - __host__ __device__ - explicit pointer(OtherT *ptr) : super_t(ptr) {} - - /*! This constructor allows construction from another pointer-like object with related type. - * - * \param other The \p OtherPointer to copy. - * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible - * to \p thrust::system::tbb::tag and its element type shall be convertible to \p T. - */ - template - __host__ __device__ - pointer(const OtherPointer &other, - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer - >::type * = 0) : super_t(other) {} - - /*! Assignment operator allows assigning from another pointer-like object with related type. - * - * \param other The other pointer-like object to assign from. - * \tparam OtherPointer The system tag associated with \p OtherPointer shall be convertible - * to \p thrust::system::tbb::tag and its element type shall be convertible to \p T. - */ - template - __host__ __device__ - typename thrust::detail::enable_if_pointer_is_convertible< - OtherPointer, - pointer, - pointer & - >::type - operator=(const OtherPointer &other) - { - return super_t::operator=(other); - } -}; // end pointer - - -/*! \p reference is a wrapped reference to an object stored in memory available to the \p tbb system. - * \p reference is the type of the result of dereferencing a \p tbb::pointer. - * - * \tparam T Specifies the type of the referenced object. - */ -template - class reference - : public thrust::reference< - T, - thrust::system::tbb::pointer, - thrust::system::tbb::reference - > -{ - /*! \cond - */ - - private: - typedef thrust::reference< - T, - thrust::system::tbb::pointer, - thrust::system::tbb::reference - > super_t; - - /*! \endcond - */ - - public: - /*! \cond - */ - - typedef typename super_t::value_type value_type; - typedef typename super_t::pointer pointer; - - /*! \endcond - */ - - /*! This constructor initializes this \p reference to refer to an object - * pointed to by the given \p pointer. After this \p reference is constructed, - * it shall refer to the object pointed to by \p ptr. - * - * \param ptr A \p pointer to copy from. - */ - __host__ __device__ - explicit reference(const pointer &ptr) - : super_t(ptr) - {} - - /*! This constructor accepts a const reference to another \p reference of related type. - * After this \p reference is constructed, it shall refer to the same object as \p other. - * - * \param other A \p reference to copy from. - * \tparam OtherT The element type of the other \p reference. - * - * \note This constructor is templated primarily to allow initialization of reference - * from reference. - */ - template - __host__ __device__ - reference(const reference &other, - typename thrust::detail::enable_if_convertible< - typename reference::pointer, - pointer - >::type * = 0) - : super_t(other) - {} - - /*! Copy assignment operator copy assigns from another \p reference of related type. - * - * \param other The other \p reference to assign from. - * \return *this - * \tparam OtherT The element type of the other \p reference. - */ - template - reference &operator=(const reference &other); - - /*! Assignment operator assigns from a \p value_type. - * - * \param x The \p value_type to assign from. - * \return *this - */ - reference &operator=(const value_type &x); -}; // end reference - -/*! Exchanges the values of two objects referred to by \p reference. - * \p x The first \p reference of interest. - * \p y The second \p reference ot interest. - */ -template -__host__ __device__ -void swap(reference x, reference y); - -/*! Allocates an area of memory available to Thrust's tbb system. - * \param n Number of bytes to allocate. - * \return A tbb::pointer pointing to the beginning of the newly - * allocated memory. A null tbb::pointer is returned if - * an error occurs. - * \note The tbb::pointer returned by this function must be - * deallocated with \p tbb::free. - * \see tbb::free - * \see std::malloc - */ -inline pointer malloc(std::size_t n); - -/*! Allocates a typed area of memory available to Thrust's tbb system. - * \param n Number of elements to allocate. - * \return A tbb::pointer pointing to the beginning of the newly - * allocated memory. A null tbb::pointer is returned if - * an error occurs. - * \note The tbb::pointer returned by this function must be - * deallocated with \p tbb::free. - * \see tbb::free - * \see std::malloc - */ -template -inline pointer malloc(std::size_t n); - -/*! Deallocates an area of memory previously allocated by tbb::malloc. - * \param ptr A tbb::pointer pointing to the beginning of an area - * of memory previously allocated with tbb::malloc. - * \see tbb::malloc - * \see std::free - */ -inline void free(pointer ptr); - -// XXX upon c++11 -// template using allocator = thrust::detail::malloc_allocator >; - -/*! \p tbb::allocator is the default allocator used by the \p tbb system's containers such as - * tbb::vector if no user-specified allocator is provided. \p tbb::allocator allocates - * (deallocates) storage with \p tbb::malloc (\p tbb::free). - */ -template - struct allocator - : thrust::detail::malloc_allocator< - T, - tag, - pointer - > -{ - /*! The \p rebind metafunction provides the type of an \p allocator - * instantiated with another type. - * - * \tparam U The other type to use for instantiation. - */ - template - struct rebind - { - /*! The typedef \p other gives the type of the rebound \p allocator. - */ - typedef allocator other; - }; - - /*! No-argument constructor has no effect. - */ - __host__ __device__ - inline allocator() {} - - /*! Copy constructor has no effect. - */ - __host__ __device__ - inline allocator(const allocator &) {} - - /*! Constructor from other \p allocator has no effect. - */ - template - __host__ __device__ - inline allocator(const allocator &) {} - - /*! Destructor has no effect. - */ - __host__ __device__ - inline ~allocator() {} -}; // end allocator - -} // end tbb - -/*! \} - */ - -} // end system - -/*! \namespace thrust::tbb - * \brief \p thrust::tbb is a top-level alias for thrust::system::tbb. - */ -namespace tbb -{ - -using thrust::system::tbb::pointer; -using thrust::system::tbb::reference; -using thrust::system::tbb::malloc; -using thrust::system::tbb::free; -using thrust::system::tbb::allocator; - -} // end tbb - -} // end thrust - -#include - diff --git a/compat/thrust/system/tbb/vector.h b/compat/thrust/system/tbb/vector.h deleted file mode 100644 index 1c49c3f9e0..0000000000 --- a/compat/thrust/system/tbb/vector.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in ctbbliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system/tbb/vector.h - * \brief A dynamically-sizable array of elements which reside in memory available to - * Thrust's TBB system. - */ - -#pragma once - -#include -#include -#include -#include - -namespace thrust -{ -namespace system -{ -namespace tbb -{ - -// XXX upon c++11 -// template > using vector = thrust::detail::vector_base; - -/*! \p tbb::vector is a container that supports random access to elements, - * constant time removal of elements at the end, and linear time insertion - * and removal of elements at the beginning or in the middle. The number of - * elements in a \p tbb::vector may vary dynamically; memory management is - * automatic. The elements contained in a \p tbb::vector reside in memory - * available to the \p tbb system. - * - * \tparam T The element type of the \p tbb::vector. - * \tparam Allocator The allocator type of the \p tbb::vector. Defaults to \p tbb::allocator. - * - * \see http://www.sgi.com/tech/stl/Vector.html - * \see host_vector For the documentation of the complete interface which is - * shared by \p tbb::vector - * \see device_vector - */ -template > - class vector - : public thrust::detail::vector_base -{ - /*! \cond - */ - private: - typedef thrust::detail::vector_base super_t; - /*! \endcond - */ - - public: - - /*! \cond - */ - typedef typename super_t::size_type size_type; - typedef typename super_t::value_type value_type; - /*! \endcond - */ - - /*! This constructor creates an empty \p tbb::vector. - */ - vector(); - - /*! This constructor creates a \p tbb::vector with \p n default-constructed elements. - * \param n The size of the \p tbb::vector to create. - */ - explicit vector(size_type n); - - /*! This constructor creates a \p tbb::vector with \p n copies of \p value. - * \param n The size of the \p tbb::vector to create. - * \param value An element to copy. - */ - explicit vector(size_type n, const value_type &value); - - /*! Copy constructor copies from another \p tbb::vector. - * \param x The other \p tbb::vector to copy. - */ - vector(const vector &x); - - /*! This constructor copies from another Thrust vector-like object. - * \param x The other object to copy from. - */ - template - vector(const thrust::detail::vector_base &x); - - /*! This constructor copies from a \c std::vector. - * \param x The \c std::vector to copy from. - */ - template - vector(const std::vector &x); - - /*! This constructor creates a \p tbb::vector by copying from a range. - * \param first The beginning of the range. - * \param last The end of the range. - */ - template - vector(InputIterator first, InputIterator last); - - // XXX vector_base should take a Derived type so we don't have to define these superfluous assigns - - /*! Assignment operator assigns from a \c std::vector. - * \param x The \c std::vector to assign from. - * \return *this - */ - template - vector &operator=(const std::vector &x); - - /*! Assignment operator assigns from another Thrust vector-like object. - * \param x The other object to assign from. - * \return *this - */ - template - vector &operator=(const thrust::detail::vector_base &x); -}; // end vector - -} // end tbb -} // end system - -// alias system::tbb names at top-level -namespace tbb -{ - -using thrust::system::tbb::vector; - -} // end tbb - -} // end thrust - -#include - diff --git a/compat/thrust/system_error.h b/compat/thrust/system_error.h deleted file mode 100644 index ce88fe6bd8..0000000000 --- a/compat/thrust/system_error.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file thrust/system_error.h - * \brief System diagnostics - */ - -#pragma once - -#include - -namespace thrust -{ - -/*! \addtogroup system System Access - * \{ - */ - -/*! \namespace thrust::system - * \brief \p thrust::system is the namespace which contains functionality for manipulating - * memory specific to one of Thrust's backend systems. It also contains functionality - * for reporting error conditions originating from the operating system or other - * low-level application program interfaces such as the CUDA runtime. - * They are provided in a separate namespace for import convenience but are - * also aliased in the top-level \p thrust namespace for easy access. - */ -namespace system -{ -} // end system - -/*! \} // end system - */ - -} // end thrust - -#include -#include - diff --git a/compat/thrust/tabulate.h b/compat/thrust/tabulate.h deleted file mode 100644 index c87edf01ab..0000000000 --- a/compat/thrust/tabulate.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file tabulate.h - * \brief Fills a range with the tabulation of a function - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup transformations - * \{ - */ - - -/*! \p tabulate fills the range [first, last) with the value of a function applied to each - * element's index. - * - * For each iterator \c i in the range [first, last), \p tabulate performs the assignment - * *i = unary_op(i - first). - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the range. - * \param last The end of the range. - * \param unary_op The unary operation to apply. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, - * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. - * \tparam UnaryOperation is a model of Unary Function - * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p tabulate to generate the first \c n non-positive integers - * using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * const int N = 10; - * int A[N]; - * thrust::tabulate(thrust::host, A, A + 10, thrust::negate()); - * // A is now {0, -1, -2, -3, -4, -5, -6, -7, -8, -9} - * \endcode - * - * \see thrust::fill - * \see thrust::generate - * \see thrust::sequence - */ -template - void tabulate(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - UnaryOperation unary_op); - - -/*! \p tabulate fills the range [first, last) with the value of a function applied to each - * element's index. - * - * For each iterator \c i in the range [first, last), \p tabulate performs the assignment - * *i = unary_op(i - first). - * - * \param first The beginning of the range. - * \param last The end of the range. - * \param unary_op The unary operation to apply. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and if \c x and \c y are objects of \c ForwardIterator's \c value_type, then x + y is defined, - * and if \c T is \p ForwardIterator's \c value_type, then T(0) is defined. - * \tparam UnaryOperation is a model of Unary Function - * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * - * The following code snippet demonstrates how to use \p tabulate to generate the first \c n non-positive integers: - * - * \code - * #include - * #include - * ... - * const int N = 10; - * int A[N]; - * thrust::tabulate(A, A + 10, thrust::negate()); - * // A is now {0, -1, -2, -3, -4, -5, -6, -7, -8, -9} - * \endcode - * - * \see thrust::fill - * \see thrust::generate - * \see thrust::sequence - */ -template - void tabulate(ForwardIterator first, - ForwardIterator last, - UnaryOperation unary_op); - - -/*! \} // end transformations - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/transform.h b/compat/thrust/transform.h deleted file mode 100644 index 1ada105828..0000000000 --- a/compat/thrust/transform.h +++ /dev/null @@ -1,720 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file transform.h - * \brief Transforms input ranges using a function object - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup algorithms - */ - -/*! \addtogroup transformations - * \ingroup algorithms - * \{ - */ - - -/*! This version of \p transform applies a unary function to each element - * of an input sequence and stores the result in the corresponding - * position in an output sequence. Specifically, for each iterator - * i in the range [\p first, \p last) the operation - * op(*i) is performed and the result is assigned to *o, - * where o is the corresponding output iterator in the range - * [\p result, \p result + (\p last - \p first) ). The input and - * output sequences may coincide, resulting in an in-place transformation. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param op The tranformation operation. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. - * - * The following code snippet demonstrates how to use \p transform to negate a range in-place - * using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * - * thrust::negate op; - * - * thrust::transform(thrust::host, data, data + 10, data, op); // in-place transformation - * - * // data is now {5, 0, -2, 3, -2, -4, 0, 1, -2, -8}; - * \endcode - * - * \see http://www.sgi.com/tech/stl/transform.html - */ -template - OutputIterator transform(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - OutputIterator result, - UnaryFunction op); - - -/*! This version of \p transform applies a unary function to each element - * of an input sequence and stores the result in the corresponding - * position in an output sequence. Specifically, for each iterator - * i in the range [\p first, \p last) the operation - * op(*i) is performed and the result is assigned to *o, - * where o is the corresponding output iterator in the range - * [\p result, \p result + (\p last - \p first) ). The input and - * output sequences may coincide, resulting in an in-place transformation. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param op The tranformation operation. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. - * - * The following code snippet demonstrates how to use \p transform - * - * \code - * #include - * #include - * - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * - * thrust::negate op; - * - * thrust::transform(data, data + 10, data, op); // in-place transformation - * - * // data is now {5, 0, -2, 3, -2, -4, 0, 1, -2, -8}; - * \endcode - * - * \see http://www.sgi.com/tech/stl/transform.html - */ -template - OutputIterator transform(InputIterator first, InputIterator last, - OutputIterator result, - UnaryFunction op); - - -/*! This version of \p transform applies a binary function to each pair - * of elements from two input sequences and stores the result in the - * corresponding position in an output sequence. Specifically, for - * each iterator i in the range [\p first1, \p last1) and - * j = first + (i - first1) in the range [\p first2, \p last2) - * the operation op(*i,*j) is performed and the result is - * assigned to *o, where o is the corresponding - * output iterator in the range [\p result, \p result + (\p last - \p first) ). - * The input and output sequences may coincide, resulting in an - * in-place transformation. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input sequence. - * \param last1 The end of the first input sequence. - * \param first2 The beginning of the second input sequence. - * \param result The beginning of the output sequence. - * \param op The tranformation operation. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator - * and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type. - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam BinaryFunction is a model of Binary Function - * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * - * \pre \p first1 may equal \p result, but the range [first1, last1) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * \pre \p first2 may equal \p result, but the range [first2, first2 + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * - * The following code snippet demonstrates how to use \p transform to compute the sum of two - * ranges using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int input1[6] = {-5, 0, 2, 3, 2, 4}; - * int input2[6] = { 3, 6, -2, 1, 2, 3}; - * int output[6]; - * - * thrust::plus op; - * - * thrust::transform(thrust::host, input1, input1 + 6, input2, output, op); - * - * // output is now {-2, 6, 0, 4, 4, 7}; - * \endcode - * - * \see http://www.sgi.com/tech/stl/transform.html - */ -template - OutputIterator transform(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryFunction op); - - -/*! This version of \p transform applies a binary function to each pair - * of elements from two input sequences and stores the result in the - * corresponding position in an output sequence. Specifically, for - * each iterator i in the range [\p first1, \p last1) and - * j = first + (i - first1) in the range [\p first2, \p last2) - * the operation op(*i,*j) is performed and the result is - * assigned to *o, where o is the corresponding - * output iterator in the range [\p result, \p result + (\p last - \p first) ). - * The input and output sequences may coincide, resulting in an - * in-place transformation. - * - * \param first1 The beginning of the first input sequence. - * \param last1 The end of the first input sequence. - * \param first2 The beginning of the second input sequence. - * \param result The beginning of the output sequence. - * \param op The tranformation operation. - * \return The end of the output sequence. - * - * \tparam InputIterator1 is a model of Input Iterator - * and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type. - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam BinaryFunction is a model of Binary Function - * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * - * \pre \p first1 may equal \p result, but the range [first1, last1) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * \pre \p first2 may equal \p result, but the range [first2, first2 + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * - * The following code snippet demonstrates how to use \p transform - * - * \code - * #include - * #include - * - * int input1[6] = {-5, 0, 2, 3, 2, 4}; - * int input2[6] = { 3, 6, -2, 1, 2, 3}; - * int output[6]; - * - * thrust::plus op; - * - * thrust::transform(input1, input1 + 6, input2, output, op); - * - * // output is now {-2, 6, 0, 4, 4, 7}; - * \endcode - * - * \see http://www.sgi.com/tech/stl/transform.html - */ -template - OutputIterator transform(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, - OutputIterator result, - BinaryFunction op); - - -/*! This version of \p transform_if conditionally applies a unary function - * to each element of an input sequence and stores the result in the corresponding - * position in an output sequence if the corresponding position in the input sequence - * satifies a predicate. Otherwise, the corresponding position in the - * output sequence is not modified. - * - * Specifically, for each iterator i in the range [first, last) the - * predicate pred(*i) is evaluated. If this predicate - * evaluates to \c true, the result of op(*i) is assigned to *o, - * where o is the corresponding output iterator in the range - * [result, result + (last - first) ). Otherwise, op(*i) is - * not evaluated and no assignment occurs. The input and output sequences may coincide, - * resulting in an in-place transformation. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param op The tranformation operation. - * \param pred The predicate operation. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \c InputIterator's \c value_type is convertible to \c Predicate's \c argument_type, - * and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * \tparam Predicate is a model of Predicate. - * - * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. - * - * The following code snippet demonstrates how to use \p transform_if to negate the odd-valued - * elements of a range using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * - * struct is_odd - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x % 2; - * } - * }; - * - * thrust::negate op; - * thrust::identity identity; - * - * // negate odd elements - * thrust::transform_if(thrust::host, data, data + 10, data, op, is_odd()); // in-place transformation - * - * // data is now {5, 0, 2, 3, 2, 4, 0, 1, 2, 8}; - * \endcode - * - * \see thrust::transform - */ -template - ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, - InputIterator first, InputIterator last, - ForwardIterator result, - UnaryFunction op, - Predicate pred); - - -/*! This version of \p transform_if conditionally applies a unary function - * to each element of an input sequence and stores the result in the corresponding - * position in an output sequence if the corresponding position in the input sequence - * satifies a predicate. Otherwise, the corresponding position in the - * output sequence is not modified. - * - * Specifically, for each iterator i in the range [first, last) the - * predicate pred(*i) is evaluated. If this predicate - * evaluates to \c true, the result of op(*i) is assigned to *o, - * where o is the corresponding output iterator in the range - * [result, result + (last - first) ). Otherwise, op(*i) is - * not evaluated and no assignment occurs. The input and output sequences may coincide, - * resulting in an in-place transformation. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param op The tranformation operation. - * \param pred The predicate operation. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator, - * and \c InputIterator's \c value_type is convertible to \c Predicate's \c argument_type, - * and \c InputIterator's \c value_type is convertible to \c UnaryFunction's \c argument_type. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * \tparam Predicate is a model of Predicate. - * - * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. - * - * The following code snippet demonstrates how to use \p transform_if: - * - * \code - * #include - * #include - * - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * - * struct is_odd - * { - * __host__ __device__ - * bool operator()(int x) - * { - * return x % 2; - * } - * }; - * - * thrust::negate op; - * thrust::identity identity; - * - * // negate odd elements - * thrust::transform_if(data, data + 10, data, op, is_odd()); // in-place transformation - * - * // data is now {5, 0, 2, 3, 2, 4, 0, 1, 2, 8}; - * \endcode - * - * \see thrust::transform - */ -template - ForwardIterator transform_if(InputIterator first, InputIterator last, - ForwardIterator result, - UnaryFunction op, - Predicate pred); - - -/*! This version of \p transform_if conditionally applies a unary function - * to each element of an input sequence and stores the result in the corresponding - * position in an output sequence if the corresponding position in a stencil sequence - * satisfies a predicate. Otherwise, the corresponding position in the - * output sequence is not modified. - * - * Specifically, for each iterator i in the range [first, last) the - * predicate pred(*s) is evaluated, where s is the corresponding input - * iterator in the range [stencil, stencil + (last - first) ). If this predicate - * evaluates to \c true, the result of op(*i) is assigned to *o, - * where o is the corresponding output iterator in the range - * [result, result + (last - first) ). Otherwise, op(*i) is - * not evaluated and no assignment occurs. The input and output sequences may coincide, - * resulting in an in-place transformation. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param stencil The beginning of the stencil sequence. - * \param result The beginning of the output sequence. - * \param op The tranformation operation. - * \param pred The predicate operation. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator - * and \c InputIterator1's \c value_type is convertible to \c UnaryFunction's \c argument_type. - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c Predicate's \c argument_type. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * \tparam Predicate is a model of Predicate. - * - * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. - * \pre \p stencil may equal \p result, but the range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)) otherwise. - * - * The following code snippet demonstrates how to use \p transform_if using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * int stencil[10] = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - * - * thrust::negate op; - * thrust::identity identity; - * - * thrust::transform_if(thrust::host, data, data + 10, stencil, data, op, identity); // in-place transformation - * - * // data is now {5, 0, -2, -3, -2, 4, 0, -1, -2, 8}; - * \endcode - * - * \see thrust::transform - */ -template - ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first, InputIterator1 last, - InputIterator2 stencil, - ForwardIterator result, - UnaryFunction op, - Predicate pred); - - -/*! This version of \p transform_if conditionally applies a unary function - * to each element of an input sequence and stores the result in the corresponding - * position in an output sequence if the corresponding position in a stencil sequence - * satisfies a predicate. Otherwise, the corresponding position in the - * output sequence is not modified. - * - * Specifically, for each iterator i in the range [first, last) the - * predicate pred(*s) is evaluated, where s is the corresponding input - * iterator in the range [stencil, stencil + (last - first) ). If this predicate - * evaluates to \c true, the result of op(*i) is assigned to *o, - * where o is the corresponding output iterator in the range - * [result, result + (last - first) ). Otherwise, op(*i) is - * not evaluated and no assignment occurs. The input and output sequences may coincide, - * resulting in an in-place transformation. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param stencil The beginning of the stencil sequence. - * \param result The beginning of the output sequence. - * \param op The tranformation operation. - * \param pred The predicate operation. - * \return The end of the output sequence. - * - * \tparam InputIterator1 is a model of Input Iterator - * and \c InputIterator1's \c value_type is convertible to \c UnaryFunction's \c argument_type. - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c Predicate's \c argument_type. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and \c UnaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * \tparam Predicate is a model of Predicate. - * - * \pre \p first may equal \p result, but the range [first, last) shall not overlap the range [result, result + (last - first)) otherwise. - * \pre \p stencil may equal \p result, but the range [stencil, stencil + (last - first)) shall not overlap the range [result, result + (last - first)) otherwise. - * - * The following code snippet demonstrates how to use \p transform_if: - * - * \code - * #include - * #include - * - * int data[10] = {-5, 0, 2, -3, 2, 4, 0, -1, 2, 8}; - * int stencil[10] = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0}; - * - * thrust::negate op; - * thrust::identity identity; - * - * thrust::transform_if(data, data + 10, stencil, data, op, identity); // in-place transformation - * - * // data is now {5, 0, -2, -3, -2, 4, 0, -1, -2, 8}; - * \endcode - * - * \see thrust::transform - */ -template - ForwardIterator transform_if(InputIterator1 first, InputIterator1 last, - InputIterator2 stencil, - ForwardIterator result, - UnaryFunction op, - Predicate pred); - - -/*! This version of \p transform_if conditionally applies a binary function - * to each pair of elements from two input sequences and stores the result in the corresponding - * position in an output sequence if the corresponding position in a stencil sequence - * satifies a predicate. Otherwise, the corresponding position in the - * output sequence is not modified. - * - * Specifically, for each iterator i in the range [first1, last1) and - * j = first2 + (i - first1) in the range [first2, first2 + (last1 - first1) ), - * the predicate pred(*s) is evaluated, where s is the corresponding input - * iterator in the range [stencil, stencil + (last1 - first1) ). If this predicate - * evaluates to \c true, the result of binary_op(*i,*j) is assigned to *o, - * where o is the corresponding output iterator in the range - * [result, result + (last1 - first1) ). Otherwise, binary_op(*i,*j) is - * not evaluated and no assignment occurs. The input and output sequences may coincide, - * resulting in an in-place transformation. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first1 The beginning of the first input sequence. - * \param last1 The end of the first input sequence. - * \param first2 The beginning of the second input sequence. - * \param stencil The beginning of the stencil sequence. - * \param result The beginning of the output sequence. - * \param binary_op The transformation operation. - * \param pred The predicate operation. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator - * and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type. - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam BinaryFunction is a model of Binary Function - * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * \tparam Predicate is a model of Predicate. - * - * \pre \p first1 may equal \p result, but the range [first1, last1) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * \pre \p first2 may equal \p result, but the range [first2, first2 + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * \pre \p stencil may equal \p result, but the range [stencil, stencil + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * - * The following code snippet demonstrates how to use \p transform_if using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * ... - * - * int input1[6] = {-5, 0, 2, 3, 2, 4}; - * int input2[6] = { 3, 6, -2, 1, 2, 3}; - * int stencil[8] = { 1, 0, 1, 0, 1, 0}; - * int output[6]; - * - * thrust::plus op; - * thrust::identity identity; - * - * thrust::transform_if(thrust::host, input1, input1 + 6, input2, stencil, output, op, identity); - * - * // output is now {-2, 0, 0, 3, 4, 4}; - * \endcode - * - * \see thrust::transform - */ -template - ForwardIterator transform_if(const thrust::detail::execution_policy_base &exec, - InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, - InputIterator3 stencil, - ForwardIterator result, - BinaryFunction binary_op, - Predicate pred); - - -/*! This version of \p transform_if conditionally applies a binary function - * to each pair of elements from two input sequences and stores the result in the corresponding - * position in an output sequence if the corresponding position in a stencil sequence - * satifies a predicate. Otherwise, the corresponding position in the - * output sequence is not modified. - * - * Specifically, for each iterator i in the range [first1, last1) and - * j = first2 + (i - first1) in the range [first2, first2 + (last1 - first1) ), - * the predicate pred(*s) is evaluated, where s is the corresponding input - * iterator in the range [stencil, stencil + (last1 - first1) ). If this predicate - * evaluates to \c true, the result of binary_op(*i,*j) is assigned to *o, - * where o is the corresponding output iterator in the range - * [result, result + (last1 - first1) ). Otherwise, binary_op(*i,*j) is - * not evaluated and no assignment occurs. The input and output sequences may coincide, - * resulting in an in-place transformation. - * - * \param first1 The beginning of the first input sequence. - * \param last1 The end of the first input sequence. - * \param first2 The beginning of the second input sequence. - * \param stencil The beginning of the stencil sequence. - * \param result The beginning of the output sequence. - * \param binary_op The transformation operation. - * \param pred The predicate operation. - * \return The end of the output sequence. - * - * \tparam InputIterator1 is a model of Input Iterator - * and \c InputIterator1's \c value_type is convertible to \c BinaryFunction's \c first_argument_type. - * \tparam InputIterator2 is a model of Input Iterator - * and \c InputIterator2's \c value_type is convertible to \c BinaryFunction's \c second_argument_type. - * \tparam ForwardIterator is a model of Forward Iterator. - * \tparam BinaryFunction is a model of Binary Function - * and \c BinaryFunction's \c result_type is convertible to \c OutputIterator's \c value_type. - * \tparam Predicate is a model of Predicate. - * - * \pre \p first1 may equal \p result, but the range [first1, last1) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * \pre \p first2 may equal \p result, but the range [first2, first2 + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * \pre \p stencil may equal \p result, but the range [stencil, stencil + (last1 - first1)) shall not overlap the range [result, result + (last1 - first1)) otherwise. - * - * The following code snippet demonstrates how to use \p transform_if: - * - * \code - * #include - * #include - * - * int input1[6] = {-5, 0, 2, 3, 2, 4}; - * int input2[6] = { 3, 6, -2, 1, 2, 3}; - * int stencil[8] = { 1, 0, 1, 0, 1, 0}; - * int output[6]; - * - * thrust::plus op; - * thrust::identity identity; - * - * thrust::transform_if(input1, input1 + 6, input2, stencil, output, op, identity); - * - * // output is now {-2, 0, 0, 3, 4, 4}; - * \endcode - * - * \see thrust::transform - */ -template - ForwardIterator transform_if(InputIterator1 first1, InputIterator1 last1, - InputIterator2 first2, - InputIterator3 stencil, - ForwardIterator result, - BinaryFunction binary_op, - Predicate pred); - - -/*! \} // end transformations - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/transform_reduce.h b/compat/thrust/transform_reduce.h deleted file mode 100644 index 3ef5efd6aa..0000000000 --- a/compat/thrust/transform_reduce.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file transform_reduce.h - * \brief Fused transform / reduction - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup reductions - * \{ - * \addtogroup transformed_reductions Transformed Reductions - * \ingroup reductions - * \{ - */ - - -/*! \p transform_reduce fuses the \p transform and \p reduce operations. - * \p transform_reduce is equivalent to performing a transformation defined by - * \p unary_op into a temporary sequence and then performing \p reduce on the - * transformed sequence. In most cases, fusing these two operations together is - * more efficient, since fewer memory reads and writes are required. - * - * \p transform_reduce performs a reduction on the transformation of the - * sequence [first, last) according to \p unary_op. Specifically, - * \p unary_op is applied to each element of the sequence and then the result - * is reduced to a single value with \p binary_op using the initial value - * \p init. Note that the transformation \p unary_op is not applied to - * the initial value \p init. The order of reduction is not specified, - * so \p binary_op must be both commutative and associative. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param unary_op The function to apply to each element of the input sequence. - * \param init The result is initialized to this value. - * \param binary_op The reduction operation. - * \return The result of the transformed reduction. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. - * \tparam UnaryFunction is a model of Unary Function, - * and \p UnaryFunction's \c result_type is convertible to \c OutputType. - * \tparam OutputType is a model of Assignable, - * and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type. - * \tparam BinaryFunction is a model of Binary Function, - * and \p BinaryFunction's \c result_type is convertible to \p OutputType. - * - * The following code snippet demonstrates how to use \p transform_reduce - * to compute the maximum value of the absolute value of the elements - * of a range using the \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * - * template - * struct absolute_value : public unary_function - * { - * __host__ __device__ T operator()(const T &x) const - * { - * return x < T(0) ? -x : x; - * } - * }; - * - * ... - * - * int data[6] = {-1, 0, -2, -2, 1, -3}; - * int result = thrust::transform_reduce(thrust::host, - * data, data + 6, - * absolute_value(), - * 0, - * thrust::maximum()); - * // result == 3 - * \endcode - * - * \see \c transform - * \see \c reduce - */ -template - OutputType transform_reduce(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - UnaryFunction unary_op, - OutputType init, - BinaryFunction binary_op); - - -/*! \p transform_reduce fuses the \p transform and \p reduce operations. - * \p transform_reduce is equivalent to performing a transformation defined by - * \p unary_op into a temporary sequence and then performing \p reduce on the - * transformed sequence. In most cases, fusing these two operations together is - * more efficient, since fewer memory reads and writes are required. - * - * \p transform_reduce performs a reduction on the transformation of the - * sequence [first, last) according to \p unary_op. Specifically, - * \p unary_op is applied to each element of the sequence and then the result - * is reduced to a single value with \p binary_op using the initial value - * \p init. Note that the transformation \p unary_op is not applied to - * the initial value \p init. The order of reduction is not specified, - * so \p binary_op must be both commutative and associative. - * - * \param first The beginning of the sequence. - * \param last The end of the sequence. - * \param unary_op The function to apply to each element of the input sequence. - * \param init The result is initialized to this value. - * \param binary_op The reduction operation. - * \return The result of the transformed reduction. - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is convertible to \p UnaryFunction's \c argument_type. - * \tparam UnaryFunction is a model of Unary Function, - * and \p UnaryFunction's \c result_type is convertible to \c OutputType. - * \tparam OutputType is a model of Assignable, - * and is convertible to \p BinaryFunction's \c first_argument_type and \c second_argument_type. - * \tparam BinaryFunction is a model of Binary Function, - * and \p BinaryFunction's \c result_type is convertible to \p OutputType. - * - * The following code snippet demonstrates how to use \p transform_reduce - * to compute the maximum value of the absolute value of the elements - * of a range. - * - * \code - * #include - * #include - * - * template - * struct absolute_value : public unary_function - * { - * __host__ __device__ T operator()(const T &x) const - * { - * return x < T(0) ? -x : x; - * } - * }; - * - * ... - * - * int data[6] = {-1, 0, -2, -2, 1, -3}; - * int result = thrust::transform_reduce(data, data + 6, - * absolute_value(), - * 0, - * thrust::maximum()); - * // result == 3 - * \endcode - * - * \see \c transform - * \see \c reduce - */ -template - OutputType transform_reduce(InputIterator first, - InputIterator last, - UnaryFunction unary_op, - OutputType init, - BinaryFunction binary_op); - - -/*! \} // end transformed_reductions - * \} // end reductions - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/transform_scan.h b/compat/thrust/transform_scan.h deleted file mode 100644 index e9943e401f..0000000000 --- a/compat/thrust/transform_scan.h +++ /dev/null @@ -1,322 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file transform_scan.h - * \brief Fused transform / prefix-sum - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup algorithms - */ - -/*! \addtogroup prefixsums Prefix Sums - * \ingroup algorithms - * \{ - */ - -/*! \addtogroup transformed_prefixsums Transformed Prefix Sums - * \ingroup prefixsums - * \{ - */ - - -/*! \p transform_inclusive_scan fuses the \p transform and \p inclusive_scan - * operations. \p transform_inclusive_scan is equivalent to performing a - * tranformation defined by \p unary_op into a temporary sequence and then - * performing an \p inclusive_scan on the tranformed sequence. In most - * cases, fusing these two operations together is more efficient, since - * fewer memory reads and writes are required. In \p transform_inclusive_scan, - * unary_op(\*first) is assigned to \*result and the result - * of binary_op(unary_op(\*first), unary_op(\*(first + 1))) is - * assigned to \*(result + 1), and so on. The transform scan - * operation is permitted to be in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param unary_op The function used to tranform the input sequence. - * \param binary_op The associatve operator used to 'sum' transformed values. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to \c unary_op's input type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and accepts inputs of \c InputIterator's \c value_type. \c UnaryFunction's result_type - * is convertable to \c OutputIterator's \c value_type. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p transform_inclusive_scan using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::negate unary_op; - * thrust::plus binary_op; - * - * thrust::transform_inclusive_scan(thrust::host, data, data + 6, data, unary_op, binary_op); // in-place scan - * - * // data is now {-1, -1, -3, -5, -6, -9} - * \endcode - * - * \see \p transform - * \see \p inclusive_scan - * - */ -template - OutputIterator transform_inclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - AssociativeOperator binary_op); - - -/*! \p transform_inclusive_scan fuses the \p transform and \p inclusive_scan - * operations. \p transform_inclusive_scan is equivalent to performing a - * tranformation defined by \p unary_op into a temporary sequence and then - * performing an \p inclusive_scan on the tranformed sequence. In most - * cases, fusing these two operations together is more efficient, since - * fewer memory reads and writes are required. In \p transform_inclusive_scan, - * unary_op(\*first) is assigned to \*result and the result - * of binary_op(unary_op(\*first), unary_op(\*(first + 1))) is - * assigned to \*(result + 1), and so on. The transform scan - * operation is permitted to be in-place. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param unary_op The function used to tranform the input sequence. - * \param binary_op The associatve operator used to 'sum' transformed values. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to \c unary_op's input type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and accepts inputs of \c InputIterator's \c value_type. \c UnaryFunction's result_type - * is convertable to \c OutputIterator's \c value_type. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p transform_inclusive_scan - * - * \code - * #include - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::negate unary_op; - * thrust::plus binary_op; - * - * thrust::transform_inclusive_scan(data, data + 6, data, unary_op, binary_op); // in-place scan - * - * // data is now {-1, -1, -3, -5, -6, -9} - * \endcode - * - * \see \p transform - * \see \p inclusive_scan - * - */ -template - OutputIterator transform_inclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - AssociativeOperator binary_op); - - -/*! \p transform_exclusive_scan fuses the \p transform and \p exclusive_scan - * operations. \p transform_exclusive_scan is equivalent to performing a - * tranformation defined by \p unary_op into a temporary sequence and then - * performing an \p exclusive_scan on the tranformed sequence. In most - * cases, fusing these two operations together is more efficient, since - * fewer memory reads and writes are required. In - * \p transform_exclusive_scan, \p init is assigned to \*result - * and the result of binary_op(init, unary_op(\*first)) is assigned - * to \*(result + 1), and so on. The transform scan operation is - * permitted to be in-place. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param unary_op The function used to tranform the input sequence. - * \param init The initial value of the \p exclusive_scan - * \param binary_op The associatve operator used to 'sum' transformed values. - * \return The end of the output sequence. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to \c unary_op's input type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and accepts inputs of \c InputIterator's \c value_type. \c UnaryFunction's result_type - * is convertable to \c OutputIterator's \c value_type. - * \tparam T is convertible to \c OutputIterator's \c value_type. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p transform_exclusive_scan using the - * \p thrust::host execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::negate unary_op; - * thrust::plus binary_op; - * - * thrust::transform_exclusive_scan(thrust::host, data, data + 6, data, unary_op, 4, binary_op); // in-place scan - * - * // data is now {4, 3, 3, 1, -1, -2} - * \endcode - * - * \see \p transform - * \see \p exclusive_scan - * - */ -template - OutputIterator transform_exclusive_scan(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - T init, - AssociativeOperator binary_op); - - -/*! \p transform_exclusive_scan fuses the \p transform and \p exclusive_scan - * operations. \p transform_exclusive_scan is equivalent to performing a - * tranformation defined by \p unary_op into a temporary sequence and then - * performing an \p exclusive_scan on the tranformed sequence. In most - * cases, fusing these two operations together is more efficient, since - * fewer memory reads and writes are required. In - * \p transform_exclusive_scan, \p init is assigned to \*result - * and the result of binary_op(init, unary_op(\*first)) is assigned - * to \*(result + 1), and so on. The transform scan operation is - * permitted to be in-place. - * - * \param first The beginning of the input sequence. - * \param last The end of the input sequence. - * \param result The beginning of the output sequence. - * \param unary_op The function used to tranform the input sequence. - * \param init The initial value of the \p exclusive_scan - * \param binary_op The associatve operator used to 'sum' transformed values. - * \return The end of the output sequence. - * - * \tparam InputIterator is a model of Input Iterator - * and \c InputIterator's \c value_type is convertible to \c unary_op's input type. - * \tparam OutputIterator is a model of Output Iterator. - * \tparam UnaryFunction is a model of Unary Function - * and accepts inputs of \c InputIterator's \c value_type. \c UnaryFunction's result_type - * is convertable to \c OutputIterator's \c value_type. - * \tparam T is convertible to \c OutputIterator's \c value_type. - * \tparam AssociativeOperator is a model of Binary Function - * and \c AssociativeOperator's \c result_type is - * convertible to \c OutputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p transform_exclusive_scan - * - * \code - * #include - * - * int data[6] = {1, 0, 2, 2, 1, 3}; - * - * thrust::negate unary_op; - * thrust::plus binary_op; - * - * thrust::transform_exclusive_scan(data, data + 6, data, unary_op, 4, binary_op); // in-place scan - * - * // data is now {4, 3, 3, 1, -1, -2} - * \endcode - * - * \see \p transform - * \see \p exclusive_scan - * - */ -template - OutputIterator transform_exclusive_scan(InputIterator first, - InputIterator last, - OutputIterator result, - UnaryFunction unary_op, - T init, - AssociativeOperator binary_op); - - -/*! \} // end transformed_prefixsums - */ - - -/*! \} // end prefixsums - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/tuple.h b/compat/thrust/tuple.h deleted file mode 100644 index 3961d982fa..0000000000 --- a/compat/thrust/tuple.h +++ /dev/null @@ -1,583 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file tuple.h - * \brief A type encapsulating a heterogeneous collection of elements - */ - -/* - * Copyright (C) 1999, 2000 Jaakko Järvi (jaakko.jarvi@cs.utu.fi) - * - * Distributed under the Boost Software License, Version 1.0. - * (See accompanying NOTICE file for the complete license) - * - * For more information, see http://www.boost.org - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - -/*! \addtogroup utility - * \{ - */ - -/*! \addtogroup tuple - * \{ - */ - -/*! \cond - */ - -struct null_type; - -/*! \endcond - */ - -/*! This metafunction returns the type of a - * \p tuple's Nth element. - * - * \tparam N This parameter selects the element of interest. - * \tparam T A \c tuple type of interest. - * - * \see pair - * \see tuple - */ -template - struct tuple_element -{ - private: - typedef typename T::tail_type Next; - - public: - /*! The result of this metafunction is returned in \c type. - */ - typedef typename tuple_element::type type; -}; // end tuple_element - -/*! This metafunction returns the number of elements - * of a \p tuple type of interest. - * - * \tparam T A \c tuple type of interest. - * - * \see pair - * \see tuple - */ -template - struct tuple_size -{ - /*! The result of this metafunction is returned in \c value. - */ - static const int value = 1 + tuple_size::value; -}; // end tuple_size - -// get function for non-const cons-lists, returns a reference to the element - -/*! The \p get function returns a reference to a \p tuple element of - * interest. - * - * \param t A reference to a \p tuple of interest. - * \return A reference to \p t's Nth element. - * - * \tparam N The index of the element of interest. - * - * The following code snippet demonstrates how to use \p get to print - * the value of a \p tuple element. - * - * \code - * #include - * #include - * ... - * thrust::tuple t(13, "thrust"); - * - * std::cout << "The 1st value of t is " << thrust::get<1>(t) << std::endl; - * \endcode - * - * \see pair - * \see tuple - */ -template -__host__ __device__ -inline typename access_traits< - typename tuple_element >::type - >::non_const_type -get(detail::cons& t); - - -/*! The \p get function returns a \c const reference to a \p tuple element of - * interest. - * - * \param t A reference to a \p tuple of interest. - * \return A \c const reference to \p t's Nth element. - * - * \tparam N The index of the element of interest. - * - * The following code snippet demonstrates how to use \p get to print - * the value of a \p tuple element. - * - * \code - * #include - * #include - * ... - * thrust::tuple t(13, "thrust"); - * - * std::cout << "The 1st value of t is " << thrust::get<1>(t) << std::endl; - * \endcode - * - * \see pair - * \see tuple - */ -template -__host__ __device__ -inline typename access_traits< - typename tuple_element >::type - >::const_type -get(const detail::cons& t); - - - -/*! \p tuple is a class template that can be instantiated with up to ten arguments. - * Each template argument specifies the type of element in the \p tuple. - * Consequently, tuples are heterogeneous, fixed-size collections of values. An - * instantiation of \p tuple with two arguments is similar to an instantiation - * of \p pair with the same two arguments. Individual elements of a \p tuple may - * be accessed with the \p get function. - * - * \tparam TN The type of the N \c tuple element. Thrust's \p tuple - * type currently supports up to ten elements. - * - * The following code snippet demonstrates how to create a new \p tuple object - * and inspect and modify the value of its elements. - * - * \code - * #include - * #include - * ... - * // create a tuple containing an int, a float, and a string - * thrust::tuple t(13, 0.1f, "thrust"); - * - * // individual members are accessed with the free function get - * std::cout << "The first element's value is " << thrust::get<0>(t) << std::endl; - * - * // or the member function get - * std::cout << "The second element's value is " << t.get<1>() << std::endl; - * - * // we can also modify elements with the same function - * thrust::get<0>(t) += 10; - * \endcode - * - * \see pair - * \see get - * \see make_tuple - * \see tuple_element - * \see tuple_size - * \see tie - */ -template - class tuple : - public detail::map_tuple_to_cons::type -{ - /*! \cond - */ - - private: - typedef typename detail::map_tuple_to_cons::type inherited; - - /*! \endcond - */ - - public: - /*! \p tuple's no-argument constructor initializes each element. - */ - inline __host__ __device__ - tuple(void) {} - - /*! \p tuple's one-argument constructor copy constructs the first element from the given parameter - * and intializes all other elements. - * \param t0 The value to assign to this \p tuple's first element. - */ - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0) - : inherited(t0, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) {} - - /*! \p tuple's one-argument constructor copy constructs the first two elements from the given parameters - * and intializes all other elements. - * \param t0 The value to assign to this \p tuple's first element. - * \param t1 The value to assign to this \p tuple's second element. - * \note \p tuple's constructor has ten variants of this form, the rest of which are ommitted here for brevity. - */ - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1) - : inherited(t0, t1, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) {} - - /*! \cond - */ - - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2) - : inherited(t0, t1, t2, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) {} - - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3) - : inherited(t0, t1, t2, t3, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) {} - - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4) - : inherited(t0, t1, t2, t3, t4, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) {} - - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5) - : inherited(t0, t1, t2, t3, t4, t5, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) {} - - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5, - typename access_traits::parameter_type t6) - : inherited(t0, t1, t2, t3, t4, t5, t6, - static_cast(null_type()), - static_cast(null_type()), - static_cast(null_type())) {} - - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5, - typename access_traits::parameter_type t6, - typename access_traits::parameter_type t7) - : inherited(t0, t1, t2, t3, t4, t5, t6, t7, - static_cast(null_type()), - static_cast(null_type())) {} - - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5, - typename access_traits::parameter_type t6, - typename access_traits::parameter_type t7, - typename access_traits::parameter_type t8) - : inherited(t0, t1, t2, t3, t4, t5, t6, t7, t8, - static_cast(null_type())) {} - - inline __host__ __device__ - tuple(typename access_traits::parameter_type t0, - typename access_traits::parameter_type t1, - typename access_traits::parameter_type t2, - typename access_traits::parameter_type t3, - typename access_traits::parameter_type t4, - typename access_traits::parameter_type t5, - typename access_traits::parameter_type t6, - typename access_traits::parameter_type t7, - typename access_traits::parameter_type t8, - typename access_traits::parameter_type t9) - : inherited(t0, t1, t2, t3, t4, t5, t6, t7, t8, t9) {} - - - template - inline __host__ __device__ - tuple(const detail::cons& p) : inherited(p) {} - - template - inline __host__ __device__ - tuple& operator=(const detail::cons& k) - { - inherited::operator=(k); - return *this; - } - - /*! \endcond - */ - - /*! This assignment operator allows assigning the first two elements of this \p tuple from a \p pair. - * \param k A \p pair to assign from. - */ - template - __host__ __device__ inline - tuple& operator=(const thrust::pair& k) { - //BOOST_STATIC_ASSERT(length::value == 2);// check_length = 2 - this->head = k.first; - this->tail.head = k.second; - return *this; - } - - /*! \p swap swaps the elements of two tuples. - * - * \param t The other tuple with which to swap. - */ - inline __host__ __device__ - void swap(tuple &t) - { - inherited::swap(t); - } -}; - -/*! \cond - */ - -template <> -class tuple : - public null_type -{ -public: - typedef null_type inherited; -}; - -/*! \endcond - */ - - -/*! This version of \p make_tuple creates a new \c tuple object from a - * single object. - * - * \param t0 The object to copy from. - * \return A \p tuple object with a single member which is a copy of \p t0. - */ -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0); - -/*! This version of \p make_tuple creates a new \c tuple object from two - * objects. - * - * \param t0 The first object to copy from. - * \param t1 The second object to copy from. - * \return A \p tuple object with two members which are copies of \p t0 - * and \p t1. - * - * \note \p make_tuple has ten variants, the rest of which are omitted here - * for brevity. - */ -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1); - -/*! This version of \p tie creates a new \c tuple whose single element is - * a reference which refers to this function's argument. - * - * \param t0 The object to reference. - * \return A \p tuple object with one member which is a reference to \p t0. - */ -template -__host__ __device__ inline -tuple tie(T0& t0); - -/*! This version of \p tie creates a new \c tuple of references object which - * refers to this function's arguments. - * - * \param t0 The first object to reference. - * \param t1 The second object to reference. - * \return A \p tuple object with two members which are references to \p t0 - * and \p t1. - * - * \note \p tie has ten variants, the rest of which are omitted here for - * brevity. - */ -template -__host__ __device__ inline -tuple tie(T0& t0, T1& t1); - -/*! \p swap swaps the contents of two tuples. - * - * \param x The first \p tuple to swap. - * \param y The second \p tuple to swap. - */ -template< - typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, - typename U0, typename U1, typename U2, typename U3, typename U4, typename U5, typename U6, typename U7, typename U8, typename U9 -> -inline __host__ __device__ -void swap(tuple &x, - tuple &y); - - - -/*! \cond - */ - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2); - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3); - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4); - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5); - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6); - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7); - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8); - -template -__host__ __device__ inline - typename detail::make_tuple_mapper::type - make_tuple(const T0& t0, const T1& t1, const T2& t2, const T3& t3, const T4& t4, const T5& t5, const T6& t6, const T7& t7, const T8& t8, const T9& t9); - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2); - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3); - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4); - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5); - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6); - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7); - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8); - -template -__host__ __device__ inline -tuple tie(T0 &t0, T1 &t1, T2 &t2, T3 &t3, T4 &t4, T5 &t5, T6 &t6, T7 &t7, T8 &t8, T9 &t9); - - -__host__ __device__ inline -bool operator==(const null_type&, const null_type&); - -__host__ __device__ inline -bool operator>=(const null_type&, const null_type&); - -__host__ __device__ inline -bool operator<=(const null_type&, const null_type&); - -__host__ __device__ inline -bool operator!=(const null_type&, const null_type&); - -__host__ __device__ inline -bool operator<(const null_type&, const null_type&); - -__host__ __device__ inline -bool operator>(const null_type&, const null_type&); - -/*! \endcond - */ - -/*! \} // tuple - */ - -/*! \} // utility - */ - -} // end thrust - diff --git a/compat/thrust/uninitialized_copy.h b/compat/thrust/uninitialized_copy.h deleted file mode 100644 index 77b673c425..0000000000 --- a/compat/thrust/uninitialized_copy.h +++ /dev/null @@ -1,301 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file uninitialized_copy.h - * \brief Copy construction into a range of uninitialized elements from a source range - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup copying - * \{ - */ - - -/*! In \c thrust, the function \c thrust::device_new allocates memory for - * an object and then creates an object at that location by calling a constructor. - * Occasionally, however, it is useful to separate those two operations. - * If each iterator in the range [result, result + (last - first)) points - * to uninitialized memory, then \p uninitialized_copy creates a copy of - * [first, last) in that range. That is, for each iterator \c i in - * the input, \p uninitialized_copy creates a copy of \c *i in the location pointed - * to by the corresponding iterator in the output range by \p ForwardIterator's - * \c value_type's copy constructor with *i as its argument. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element of the input range to copy from. - * \param last The last element of the input range to copy from. - * \param result The first element of the output range to copy to. - * \return An iterator pointing to the last element of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes - * a single argument whose type is \p InputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p uninitialized_copy to initialize - * a range of uninitialized memory using the \p thrust::device execution policy for - * parallelization: - * - * \code - * #include - * #include - * #include - * #include - * - * struct Int - * { - * __host__ __device__ - * Int(int x) : val(x) {} - * int val; - * }; - * ... - * const int N = 137; - * - * Int val(46); - * thrust::device_vector input(N, val); - * thrust::device_ptr array = thrust::device_malloc(N); - * thrust::uninitialized_copy(thrust::device, input.begin(), input.end(), array); - * - * // Int x = array[i]; - * // x.val == 46 for all 0 <= i < N - * \endcode - * - * \see http://www.sgi.com/tech/stl/uninitialized_copy.html - * \see \c copy - * \see \c uninitialized_fill - * \see \c device_new - * \see \c device_malloc - */ -template - ForwardIterator uninitialized_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - ForwardIterator result); - - -/*! In \c thrust, the function \c thrust::device_new allocates memory for - * an object and then creates an object at that location by calling a constructor. - * Occasionally, however, it is useful to separate those two operations. - * If each iterator in the range [result, result + (last - first)) points - * to uninitialized memory, then \p uninitialized_copy creates a copy of - * [first, last) in that range. That is, for each iterator \c i in - * the input, \p uninitialized_copy creates a copy of \c *i in the location pointed - * to by the corresponding iterator in the output range by \p ForwardIterator's - * \c value_type's copy constructor with *i as its argument. - * - * \param first The first element of the input range to copy from. - * \param last The last element of the input range to copy from. - * \param result The first element of the output range to copy to. - * \return An iterator pointing to the last element of the output range. - * - * \tparam InputIterator is a model of Input Iterator. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes - * a single argument whose type is \p InputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, last) and the range [result, result + (last - first)) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p uninitialized_copy to initialize - * a range of uninitialized memory. - * - * \code - * #include - * #include - * #include - * - * struct Int - * { - * __host__ __device__ - * Int(int x) : val(x) {} - * int val; - * }; - * ... - * const int N = 137; - * - * Int val(46); - * thrust::device_vector input(N, val); - * thrust::device_ptr array = thrust::device_malloc(N); - * thrust::uninitialized_copy(input.begin(), input.end(), array); - * - * // Int x = array[i]; - * // x.val == 46 for all 0 <= i < N - * \endcode - * - * \see http://www.sgi.com/tech/stl/uninitialized_copy.html - * \see \c copy - * \see \c uninitialized_fill - * \see \c device_new - * \see \c device_malloc - */ -template - ForwardIterator uninitialized_copy(InputIterator first, - InputIterator last, - ForwardIterator result); - - -/*! In \c thrust, the function \c thrust::device_new allocates memory for - * an object and then creates an object at that location by calling a constructor. - * Occasionally, however, it is useful to separate those two operations. - * If each iterator in the range [result, result + n) points - * to uninitialized memory, then \p uninitialized_copy_n creates a copy of - * [first, first + n) in that range. That is, for each iterator \c i in - * the input, \p uninitialized_copy_n creates a copy of \c *i in the location pointed - * to by the corresponding iterator in the output range by \p InputIterator's - * \c value_type's copy constructor with *i as its argument. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element of the input range to copy from. - * \param n The number of elements to copy. - * \param result The first element of the output range to copy to. - * \return An iterator pointing to the last element of the output range. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator. - * \tparam Size is an integral type. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes - * a single argument whose type is \p InputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, first + n) and the range [result, result + n) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p uninitialized_copy to initialize - * a range of uninitialized memory using the \p thrust::device execution policy for - * parallelization: - * - * \code - * #include - * #include - * #include - * #include - * - * struct Int - * { - * __host__ __device__ - * Int(int x) : val(x) {} - * int val; - * }; - * ... - * const int N = 137; - * - * Int val(46); - * thrust::device_vector input(N, val); - * thrust::device_ptr array = thrust::device_malloc(N); - * thrust::uninitialized_copy_n(thrust::device, input.begin(), N, array); - * - * // Int x = array[i]; - * // x.val == 46 for all 0 <= i < N - * \endcode - * - * \see http://www.sgi.com/tech/stl/uninitialized_copy.html - * \see \c uninitialized_copy - * \see \c copy - * \see \c uninitialized_fill - * \see \c device_new - * \see \c device_malloc - */ -template - ForwardIterator uninitialized_copy_n(const thrust::detail::execution_policy_base &exec, - InputIterator first, - Size n, - ForwardIterator result); - - -/*! In \c thrust, the function \c thrust::device_new allocates memory for - * an object and then creates an object at that location by calling a constructor. - * Occasionally, however, it is useful to separate those two operations. - * If each iterator in the range [result, result + n) points - * to uninitialized memory, then \p uninitialized_copy_n creates a copy of - * [first, first + n) in that range. That is, for each iterator \c i in - * the input, \p uninitialized_copy_n creates a copy of \c *i in the location pointed - * to by the corresponding iterator in the output range by \p InputIterator's - * \c value_type's copy constructor with *i as its argument. - * - * \param first The first element of the input range to copy from. - * \param n The number of elements to copy. - * \param result The first element of the output range to copy to. - * \return An iterator pointing to the last element of the output range. - * - * \tparam InputIterator is a model of Input Iterator. - * \tparam Size is an integral type. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that takes - * a single argument whose type is \p InputIterator's \c value_type. - * - * \pre \p first may equal \p result, but the range [first, first + n) and the range [result, result + n) shall not overlap otherwise. - * - * The following code snippet demonstrates how to use \p uninitialized_copy to initialize - * a range of uninitialized memory. - * - * \code - * #include - * #include - * #include - * - * struct Int - * { - * __host__ __device__ - * Int(int x) : val(x) {} - * int val; - * }; - * ... - * const int N = 137; - * - * Int val(46); - * thrust::device_vector input(N, val); - * thrust::device_ptr array = thrust::device_malloc(N); - * thrust::uninitialized_copy_n(input.begin(), N, array); - * - * // Int x = array[i]; - * // x.val == 46 for all 0 <= i < N - * \endcode - * - * \see http://www.sgi.com/tech/stl/uninitialized_copy.html - * \see \c uninitialized_copy - * \see \c copy - * \see \c uninitialized_fill - * \see \c device_new - * \see \c device_malloc - */ -template - ForwardIterator uninitialized_copy_n(InputIterator first, - Size n, - ForwardIterator result); - - -/*! \} // copying - */ - - -} // end thrust - -#include - diff --git a/compat/thrust/uninitialized_fill.h b/compat/thrust/uninitialized_fill.h deleted file mode 100644 index c726241ddd..0000000000 --- a/compat/thrust/uninitialized_fill.h +++ /dev/null @@ -1,273 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file uninitialized_fill.h - * \brief Copy construction into a range of uninitialized elements from a source value - */ - -#pragma once - -#include -#include - -namespace thrust -{ - - -/*! \addtogroup filling - * \ingroup transformations - * \{ - */ - - -/*! In \c thrust, the function \c thrust::device_new allocates memory for - * an object and then creates an object at that location by calling a - * constructor. Occasionally, however, it is useful to separate those two - * operations. If each iterator in the range [first, last) points - * to unitialized memory, then \p unitialized_fill creates copies of \c x - * in that range. That is, for each iterator \c i in the range [first, last), - * \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by - * calling \p ForwardIterator's \c value_type's copy constructor. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element of the range of interest. - * \param last The last element of the range of interest. - * \param x The value to use as the exemplar of the copy constructor. - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that - * takes a single argument of type \p T. - * - * The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of - * uninitialized memory using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * - * struct Int - * { - * __host__ __device__ - * Int(int x) : val(x) {} - * int val; - * }; - * ... - * const int N = 137; - * - * Int val(46); - * thrust::device_ptr array = thrust::device_malloc(N); - * thrust::uninitialized_fill(thrust::device, array, array + N, val); - * - * // Int x = array[i]; - * // x.val == 46 for all 0 <= i < N - * \endcode - * - * \see http://www.sgi.com/tech/stl/uninitialized_fill.html - * \see \c uninitialized_fill_n - * \see \c fill - * \see \c uninitialized_copy - * \see \c device_new - * \see \c device_malloc - */ -template - void uninitialized_fill(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - const T &x); - - -/*! In \c thrust, the function \c thrust::device_new allocates memory for - * an object and then creates an object at that location by calling a - * constructor. Occasionally, however, it is useful to separate those two - * operations. If each iterator in the range [first, last) points - * to unitialized memory, then \p unitialized_fill creates copies of \c x - * in that range. That is, for each iterator \c i in the range [first, last), - * \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by - * calling \p ForwardIterator's \c value_type's copy constructor. - * - * \param first The first element of the range of interest. - * \param last The last element of the range of interest. - * \param x The value to use as the exemplar of the copy constructor. - * - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that - * takes a single argument of type \p T. - * - * The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of - * uninitialized memory. - * - * \code - * #include - * #include - * - * struct Int - * { - * __host__ __device__ - * Int(int x) : val(x) {} - * int val; - * }; - * ... - * const int N = 137; - * - * Int val(46); - * thrust::device_ptr array = thrust::device_malloc(N); - * thrust::uninitialized_fill(array, array + N, val); - * - * // Int x = array[i]; - * // x.val == 46 for all 0 <= i < N - * \endcode - * - * \see http://www.sgi.com/tech/stl/uninitialized_fill.html - * \see \c uninitialized_fill_n - * \see \c fill - * \see \c uninitialized_copy - * \see \c device_new - * \see \c device_malloc - */ -template - void uninitialized_fill(ForwardIterator first, - ForwardIterator last, - const T &x); - - -/*! In \c thrust, the function \c thrust::device_new allocates memory for - * an object and then creates an object at that location by calling a - * constructor. Occasionally, however, it is useful to separate those two - * operations. If each iterator in the range [first, first+n) points - * to unitialized memory, then \p unitialized_fill creates copies of \c x - * in that range. That is, for each iterator \c i in the range [first, first+n), - * \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by - * calling \p ForwardIterator's \c value_type's copy constructor. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The first element of the range of interest. - * \param n The size of the range of interest. - * \param x The value to use as the exemplar of the copy constructor. - * \return first+n - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that - * takes a single argument of type \p T. - * - * The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of - * uninitialized memory using the \p thrust::device execution policy for parallelization: - * - * \code - * #include - * #include - * #include - * - * struct Int - * { - * __host__ __device__ - * Int(int x) : val(x) {} - * int val; - * }; - * ... - * const int N = 137; - * - * Int val(46); - * thrust::device_ptr array = thrust::device_malloc(N); - * thrust::uninitialized_fill_n(thrust::device, array, N, val); - * - * // Int x = array[i]; - * // x.val == 46 for all 0 <= i < N - * \endcode - * - * \see http://www.sgi.com/tech/stl/uninitialized_fill.html - * \see \c uninitialized_fill - * \see \c fill - * \see \c uninitialized_copy_n - * \see \c device_new - * \see \c device_malloc - */ -template - ForwardIterator uninitialized_fill_n(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - Size n, - const T &x); - - -/*! In \c thrust, the function \c thrust::device_new allocates memory for - * an object and then creates an object at that location by calling a - * constructor. Occasionally, however, it is useful to separate those two - * operations. If each iterator in the range [first, first+n) points - * to unitialized memory, then \p unitialized_fill creates copies of \c x - * in that range. That is, for each iterator \c i in the range [first, first+n), - * \p uninitialized_fill creates a copy of \c x in the location pointed to \c i by - * calling \p ForwardIterator's \c value_type's copy constructor. - * - * \param first The first element of the range of interest. - * \param n The size of the range of interest. - * \param x The value to use as the exemplar of the copy constructor. - * \return first+n - * - * \tparam ForwardIterator is a model of Forward Iterator, - * \p ForwardIterator is mutable, and \p ForwardIterator's \c value_type has a constructor that - * takes a single argument of type \p T. - * - * The following code snippet demonstrates how to use \p uninitialized_fill to initialize a range of - * uninitialized memory. - * - * \code - * #include - * #include - * - * struct Int - * { - * __host__ __device__ - * Int(int x) : val(x) {} - * int val; - * }; - * ... - * const int N = 137; - * - * Int val(46); - * thrust::device_ptr array = thrust::device_malloc(N); - * thrust::uninitialized_fill_n(array, N, val); - * - * // Int x = array[i]; - * // x.val == 46 for all 0 <= i < N - * \endcode - * - * \see http://www.sgi.com/tech/stl/uninitialized_fill.html - * \see \c uninitialized_fill - * \see \c fill - * \see \c uninitialized_copy_n - * \see \c device_new - * \see \c device_malloc - */ -template - ForwardIterator uninitialized_fill_n(ForwardIterator first, - Size n, - const T &x); - -/*! \} // end filling - * \} // transformations - */ - -} // end thrust - -#include - diff --git a/compat/thrust/unique.h b/compat/thrust/unique.h deleted file mode 100644 index 98150f36c9..0000000000 --- a/compat/thrust/unique.h +++ /dev/null @@ -1,960 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/*! \file unique.h - * \brief Move unique elements to the front of a range - */ - -#pragma once - -#include -#include -#include - -namespace thrust -{ - - -/*! \addtogroup stream_compaction - * \{ - */ - - -/*! For each group of consecutive elements in the range [first, last) - * with the same value, \p unique removes all but the first element of - * the group. The return value is an iterator \c new_last such that - * no two consecutive elements in the range [first, new_last) are - * equal. The iterators in the range [new_last, last) are all still - * dereferenceable, but the elements that they point to are unspecified. - * \p unique is stable, meaning that the relative order of elements that are - * not removed is unchanged. - * - * This version of \p unique uses \c operator== to test for equality. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input range. - * \param last The end of the input range. - * \return The end of the unique range [first, new_last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and \p ForwardIterator's \c value_type is a model of Equality Comparable. - * - * The following code snippet demonstrates how to use \p unique to - * compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution policy - * for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; - * int *new_end = thrust::unique(thrust::host, A, A + N); - * // The first four values of A are now {1, 3, 2, 1} - * // Values beyond new_end are unspecified. - * \endcode - * - * \see http://www.sgi.com/tech/stl/unique.html - * \see unique_copy - */ -template -ForwardIterator unique(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last); - - -/*! For each group of consecutive elements in the range [first, last) - * with the same value, \p unique removes all but the first element of - * the group. The return value is an iterator \c new_last such that - * no two consecutive elements in the range [first, new_last) are - * equal. The iterators in the range [new_last, last) are all still - * dereferenceable, but the elements that they point to are unspecified. - * \p unique is stable, meaning that the relative order of elements that are - * not removed is unchanged. - * - * This version of \p unique uses \c operator== to test for equality. - * - * \param first The beginning of the input range. - * \param last The end of the input range. - * \return The end of the unique range [first, new_last). - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and \p ForwardIterator's \c value_type is a model of Equality Comparable. - * - * The following code snippet demonstrates how to use \p unique to - * compact a sequence of numbers to remove consecutive duplicates. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; - * int *new_end = thrust::unique(A, A + N); - * // The first four values of A are now {1, 3, 2, 1} - * // Values beyond new_end are unspecified. - * \endcode - * - * \see http://www.sgi.com/tech/stl/unique.html - * \see unique_copy - */ -template -ForwardIterator unique(ForwardIterator first, - ForwardIterator last); - - -/*! For each group of consecutive elements in the range [first, last) - * with the same value, \p unique removes all but the first element of - * the group. The return value is an iterator \c new_last such that - * no two consecutive elements in the range [first, new_last) are - * equal. The iterators in the range [new_last, last) are all still - * dereferenceable, but the elements that they point to are unspecified. - * \p unique is stable, meaning that the relative order of elements that are - * not removed is unchanged. - * - * This version of \p unique uses the function object \p binary_pred to test - * for equality. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param binary_pred The binary predicate used to determine equality. - * \return The end of the unique range [first, new_last) - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and \p ForwardIterator's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type and to \p BinaryPredicate's \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p unique to - * compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution policy - * for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; - * int *new_end = thrust::unique(thrust::host, A, A + N, thrust::equal_to()); - * // The first four values of A are now {1, 3, 2, 1} - * // Values beyond new_end are unspecified. - * \endcode - * - * \see http://www.sgi.com/tech/stl/unique.html - * \see unique_copy - */ -template -ForwardIterator unique(const thrust::detail::execution_policy_base &exec, - ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred); - - -/*! For each group of consecutive elements in the range [first, last) - * with the same value, \p unique removes all but the first element of - * the group. The return value is an iterator \c new_last such that - * no two consecutive elements in the range [first, new_last) are - * equal. The iterators in the range [new_last, last) are all still - * dereferenceable, but the elements that they point to are unspecified. - * \p unique is stable, meaning that the relative order of elements that are - * not removed is unchanged. - * - * This version of \p unique uses the function object \p binary_pred to test - * for equality. - * - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param binary_pred The binary predicate used to determine equality. - * \return The end of the unique range [first, new_last) - * - * \tparam ForwardIterator is a model of Forward Iterator, - * and \p ForwardIterator is mutable, - * and \p ForwardIterator's \c value_type is convertible to \p BinaryPredicate's \c first_argument_type and to \p BinaryPredicate's \c second_argument_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * The following code snippet demonstrates how to use \p unique to - * compact a sequence of numbers to remove consecutive duplicates. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; - * int *new_end = thrust::unique(A, A + N, thrust::equal_to()); - * // The first four values of A are now {1, 3, 2, 1} - * // Values beyond new_end are unspecified. - * \endcode - * - * \see http://www.sgi.com/tech/stl/unique.html - * \see unique_copy - */ -template -ForwardIterator unique(ForwardIterator first, - ForwardIterator last, - BinaryPredicate binary_pred); - - -/*! \p unique_copy copies elements from the range [first, last) - * to a range beginning with \p result, except that in a consecutive group - * of duplicate elements only the first one is copied. The return value - * is the end of the range to which the elements are copied. - * - * The reason there are two different versions of unique_copy is that there - * are two different definitions of what it means for a consecutive group of - * elements to be duplicates. In the first version, the test is simple - * equality: the elements in a range [f, l) are duplicates if, - * for every iterator \p i in the range, either i == f or else - * *i == *(i-1). In the second, the test is an arbitrary - * \p BinaryPredicate \p binary_pred: the elements in [f, l) are - * duplicates if, for every iterator \p i in the range, either i == f - * or else binary_pred(*i, *(i-1)) is \p true. - * - * This version of \p unique_copy uses \c operator== to test for equality. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param result The beginning of the output range. - * \return The end of the unique range [result, result_end). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is a model of Equality Comparable. - * \tparam OutputIterator is a model of Output Iterator and - * and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. - * - * \pre The range [first,last) and the range [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p unique_copy to - * compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution - * policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; - * int B[N]; - * int *result_end = thrust::unique_copy(thrust::host, A, A + N, B); - * // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4 - * // Values beyond result_end are unspecified - * \endcode - * - * \see unique - * \see http://www.sgi.com/tech/stl/unique_copy.html - */ -template -OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result); - - -/*! \p unique_copy copies elements from the range [first, last) - * to a range beginning with \p result, except that in a consecutive group - * of duplicate elements only the first one is copied. The return value - * is the end of the range to which the elements are copied. - * - * The reason there are two different versions of unique_copy is that there - * are two different definitions of what it means for a consecutive group of - * elements to be duplicates. In the first version, the test is simple - * equality: the elements in a range [f, l) are duplicates if, - * for every iterator \p i in the range, either i == f or else - * *i == *(i-1). In the second, the test is an arbitrary - * \p BinaryPredicate \p binary_pred: the elements in [f, l) are - * duplicates if, for every iterator \p i in the range, either i == f - * or else binary_pred(*i, *(i-1)) is \p true. - * - * This version of \p unique_copy uses \c operator== to test for equality. - * - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param result The beginning of the output range. - * \return The end of the unique range [result, result_end). - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is a model of Equality Comparable. - * \tparam OutputIterator is a model of Output Iterator and - * and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. - * - * \pre The range [first,last) and the range [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p unique_copy to - * compact a sequence of numbers to remove consecutive duplicates. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; - * int B[N]; - * int *result_end = thrust::unique_copy(A, A + N, B); - * // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4 - * // Values beyond result_end are unspecified - * \endcode - * - * \see unique - * \see http://www.sgi.com/tech/stl/unique_copy.html - */ -template -OutputIterator unique_copy(InputIterator first, - InputIterator last, - OutputIterator result); - - -/*! \p unique_copy copies elements from the range [first, last) - * to a range beginning with \p result, except that in a consecutive group - * of duplicate elements only the first one is copied. The return value - * is the end of the range to which the elements are copied. - * - * This version of \p unique_copy uses the function object \c binary_pred - * to test for equality. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param result The beginning of the output range. - * \param binary_pred The binary predicate used to determine equality. - * \return The end of the unique range [result, result_end). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is a model of Equality Comparable. - * \tparam OutputIterator is a model of Output Iterator and - * and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre The range [first,last) and the range [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p unique_copy to - * compact a sequence of numbers to remove consecutive duplicates using the \p thrust::host execution - * policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; - * int B[N]; - * int *result_end = thrust::unique_copy(thrust::host, A, A + N, B, thrust::equal_to()); - * // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4 - * // Values beyond result_end are unspecified. - * \endcode - * - * \see unique - * \see http://www.sgi.com/tech/stl/unique_copy.html - */ -template -OutputIterator unique_copy(const thrust::detail::execution_policy_base &exec, - InputIterator first, - InputIterator last, - OutputIterator result, - BinaryPredicate binary_pred); - - -/*! \p unique_copy copies elements from the range [first, last) - * to a range beginning with \p result, except that in a consecutive group - * of duplicate elements only the first one is copied. The return value - * is the end of the range to which the elements are copied. - * - * This version of \p unique_copy uses the function object \c binary_pred - * to test for equality. - * - * \param first The beginning of the input range. - * \param last The end of the input range. - * \param result The beginning of the output range. - * \param binary_pred The binary predicate used to determine equality. - * \return The end of the unique range [result, result_end). - * - * \tparam InputIterator is a model of Input Iterator, - * and \p InputIterator's \c value_type is a model of Equality Comparable. - * \tparam OutputIterator is a model of Output Iterator and - * and \p InputIterator's \c value_type is convertible to \c OutputIterator's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre The range [first,last) and the range [result, result + (last - first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p unique_copy to - * compact a sequence of numbers to remove consecutive duplicates. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; - * int B[N]; - * int *result_end = thrust::unique_copy(A, A + N, B, thrust::equal_to()); - * // The first four values of B are now {1, 3, 2, 1} and (result_end - B) is 4 - * // Values beyond result_end are unspecified. - * \endcode - * - * \see unique - * \see http://www.sgi.com/tech/stl/unique_copy.html - */ -template -OutputIterator unique_copy(InputIterator first, - InputIterator last, - OutputIterator result, - BinaryPredicate binary_pred); - - -/*! \p unique_by_key is a generalization of \p unique to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p unique_by_key removes all but the first element of - * the group. Similarly, the corresponding values in the range - * [values_first, values_first + (keys_last - keys_first)) - * are also removed. - * - * The return value is a \p pair of iterators (new_keys_last,new_values_last) - * such that no two consecutive elements in the range [keys_first, new_keys_last) - * are equal. - * - * This version of \p unique_by_key uses \c operator== to test for equality and - * \c project1st to reduce values with equal keys. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the key range. - * \param keys_last The end of the key range. - * \param values_first The beginning of the value range. - * \return A pair of iterators at end of the ranges [key_first, keys_new_last) and [values_first, values_new_last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator1 is a model of Forward Iterator, - * and \p ForwardIterator1 is mutable, - * and \p ForwardIterator's \c value_type is a model of Equality Comparable. - * \tparam ForwardIterator2 is a model of Forward Iterator, - * and \p ForwardIterator2 is mutable. - * - * \pre The range [keys_first, keys_last) and the range [values_first, values_first + (keys_last - keys_first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p unique_by_key to - * compact a sequence of key/value pairs to remove consecutive duplicates using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values - * - * thrust::pair new_end; - * new_end = thrust::unique_by_key(thrust::host, A, A + N, B); - * - * // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4. - * // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4. - * \endcode - * - * \see unique - * \see unique_by_key_copy - * \see reduce_by_key - */ -template - thrust::pair - unique_by_key(const thrust::detail::execution_policy_base &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first); - - -/*! \p unique_by_key is a generalization of \p unique to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p unique_by_key removes all but the first element of - * the group. Similarly, the corresponding values in the range - * [values_first, values_first + (keys_last - keys_first)) - * are also removed. - * - * The return value is a \p pair of iterators (new_keys_last,new_values_last) - * such that no two consecutive elements in the range [keys_first, new_keys_last) - * are equal. - * - * This version of \p unique_by_key uses \c operator== to test for equality and - * \c project1st to reduce values with equal keys. - * - * \param keys_first The beginning of the key range. - * \param keys_last The end of the key range. - * \param values_first The beginning of the value range. - * \return A pair of iterators at end of the ranges [key_first, keys_new_last) and [values_first, values_new_last). - * - * \tparam ForwardIterator1 is a model of Forward Iterator, - * and \p ForwardIterator1 is mutable, - * and \p ForwardIterator's \c value_type is a model of Equality Comparable. - * \tparam ForwardIterator2 is a model of Forward Iterator, - * and \p ForwardIterator2 is mutable. - * - * \pre The range [keys_first, keys_last) and the range [values_first, values_first + (keys_last - keys_first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p unique_by_key to - * compact a sequence of key/value pairs to remove consecutive duplicates. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values - * - * thrust::pair new_end; - * new_end = thrust::unique_by_key(A, A + N, B); - * - * // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4. - * // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4. - * \endcode - * - * \see unique - * \see unique_by_key_copy - * \see reduce_by_key - */ -template - thrust::pair - unique_by_key(ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first); - - -/*! \p unique_by_key is a generalization of \p unique to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p unique_by_key removes all but the first element of - * the group. Similarly, the corresponding values in the range - * [values_first, values_first + (keys_last - keys_first)) - * are also removed. - * - * This version of \p unique_by_key uses the function object \c binary_pred - * to test for equality and \c project1st to reduce values with equal keys. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the key range. - * \param keys_last The end of the key range. - * \param values_first The beginning of the value range. - * \param binary_pred The binary predicate used to determine equality. - * \return The end of the unique range [first, new_last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam ForwardIterator1 is a model of Forward Iterator, - * and \p ForwardIterator1 is mutable, - * and \p ForwardIterator's \c value_type is a model of Equality Comparable. - * \tparam ForwardIterator2 is a model of Forward Iterator, - * and \p ForwardIterator2 is mutable. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre The range [keys_first, keys_last) and the range [values_first, values_first + (keys_last - keys_first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p unique_by_key to - * compact a sequence of key/value pairs to remove consecutive duplicates using the \p thrust::host - * execution policy for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values - * - * thrust::pair new_end; - * thrust::equal_to binary_pred; - * new_end = thrust::unique_by_key(thrust::host, keys, keys + N, values, binary_pred); - * - * // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4. - * // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4. - * \endcode - * - * \see unique - * \see unique_by_key_copy - * \see reduce_by_key - */ -template - thrust::pair - unique_by_key(const thrust::detail::execution_policy_base &exec, - ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred); - - -/*! \p unique_by_key is a generalization of \p unique to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p unique_by_key removes all but the first element of - * the group. Similarly, the corresponding values in the range - * [values_first, values_first + (keys_last - keys_first)) - * are also removed. - * - * This version of \p unique_by_key uses the function object \c binary_pred - * to test for equality and \c project1st to reduce values with equal keys. - * - * \param keys_first The beginning of the key range. - * \param keys_last The end of the key range. - * \param values_first The beginning of the value range. - * \param binary_pred The binary predicate used to determine equality. - * \return The end of the unique range [first, new_last). - * - * \tparam ForwardIterator1 is a model of Forward Iterator, - * and \p ForwardIterator1 is mutable, - * and \p ForwardIterator's \c value_type is a model of Equality Comparable. - * \tparam ForwardIterator2 is a model of Forward Iterator, - * and \p ForwardIterator2 is mutable. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre The range [keys_first, keys_last) and the range [values_first, values_first + (keys_last - keys_first)) shall not overlap. - * - * The following code snippet demonstrates how to use \p unique_by_key to - * compact a sequence of key/value pairs to remove consecutive duplicates. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // values - * - * thrust::pair new_end; - * thrust::equal_to binary_pred; - * new_end = thrust::unique_by_key(keys, keys + N, values, binary_pred); - * - * // The first four keys in A are now {1, 3, 2, 1} and new_end.first - A is 4. - * // The first four values in B are now {9, 8, 5, 3} and new_end.second - B is 4. - * \endcode - * - * \see unique - * \see unique_by_key_copy - * \see reduce_by_key - */ -template - thrust::pair - unique_by_key(ForwardIterator1 keys_first, - ForwardIterator1 keys_last, - ForwardIterator2 values_first, - BinaryPredicate binary_pred); - - -/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p unique_by_key_copy copies the first element of the group to - * a range beginning with \c keys_result and the corresponding values from the range - * [values_first, values_first + (keys_last - keys_first)) are copied to a range - * beginning with \c values_result. - * - * This version of \p unique_by_key_copy uses \c operator== to test for equality and - * \c project1st to reduce values with equal keys. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_result The beginning of the output key range. - * \param values_result The beginning of the output value range. - * \return A pair of iterators at end of the ranges [keys_result, keys_result_last) and [values_result, values_result_last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p unique_by_key_copy to - * compact a sequence of key/value pairs and with equal keys using the \p thrust::host execution policy - * for parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * new_end = thrust::unique_by_key_copy(thrust::host, A, A + N, B, C, D); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4. - * \endcode - * - * \see unique_copy - * \see unique_by_key - * \see reduce_by_key - */ -template - thrust::pair - unique_by_key_copy(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p unique_by_key_copy copies the first element of the group to - * a range beginning with \c keys_result and the corresponding values from the range - * [values_first, values_first + (keys_last - keys_first)) are copied to a range - * beginning with \c values_result. - * - * This version of \p unique_by_key_copy uses \c operator== to test for equality and - * \c project1st to reduce values with equal keys. - * - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_result The beginning of the output key range. - * \param values_result The beginning of the output value range. - * \return A pair of iterators at end of the ranges [keys_result, keys_result_last) and [values_result, values_result_last). - * - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p unique_by_key_copy to - * compact a sequence of key/value pairs and with equal keys. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * new_end = thrust::unique_by_key_copy(A, A + N, B, C, D); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4. - * \endcode - * - * \see unique_copy - * \see unique_by_key - * \see reduce_by_key - */ -template - thrust::pair - unique_by_key_copy(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_result, - OutputIterator2 values_result); - - -/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p unique_by_key_copy copies the first element of the group to - * a range beginning with \c keys_result and the corresponding values from the range - * [values_first, values_first + (keys_last - keys_first)) are copied to a range - * beginning with \c values_result. - * - * This version of \p unique_by_key_copy uses the function object \c binary_pred - * to test for equality and \c project1st to reduce values with equal keys. - * - * The algorithm's execution is parallelized as determined by \p exec. - * - * \param exec The execution policy to use for parallelization. - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_result The beginning of the output key range. - * \param values_result The beginning of the output value range. - * \param binary_pred The binary predicate used to determine equality. - * \return A pair of iterators at end of the ranges [keys_result, keys_result_last) and [values_result, values_result_last). - * - * \tparam DerivedPolicy The name of the derived execution policy. - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p unique_by_key_copy to - * compact a sequence of key/value pairs and with equal keys using the \p thrust::host execution policy for - * parallelization: - * - * \code - * #include - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * thrust::equal_to binary_pred; - * new_end = thrust::unique_by_key_copy(thrust::host, A, A + N, B, C, D, binary_pred); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4. - * \endcode - * - * \see unique_copy - * \see unique_by_key - * \see reduce_by_key - */ -template - thrust::pair - unique_by_key_copy(const thrust::detail::execution_policy_base &exec, - InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_result, - OutputIterator2 values_result, - BinaryPredicate binary_pred); - - -/*! \p unique_by_key_copy is a generalization of \p unique_copy to key-value pairs. - * For each group of consecutive keys in the range [keys_first, keys_last) - * that are equal, \p unique_by_key_copy copies the first element of the group to - * a range beginning with \c keys_result and the corresponding values from the range - * [values_first, values_first + (keys_last - keys_first)) are copied to a range - * beginning with \c values_result. - * - * This version of \p unique_by_key_copy uses the function object \c binary_pred - * to test for equality and \c project1st to reduce values with equal keys. - * - * \param keys_first The beginning of the input key range. - * \param keys_last The end of the input key range. - * \param values_first The beginning of the input value range. - * \param keys_result The beginning of the output key range. - * \param values_result The beginning of the output value range. - * \param binary_pred The binary predicate used to determine equality. - * \return A pair of iterators at end of the ranges [keys_result, keys_result_last) and [values_result, values_result_last). - * - * \tparam InputIterator1 is a model of Input Iterator, - * \tparam InputIterator2 is a model of Input Iterator, - * \tparam OutputIterator1 is a model of Output Iterator and - * and \p InputIterator1's \c value_type is convertible to \c OutputIterator1's \c value_type. - * \tparam OutputIterator2 is a model of Output Iterator and - * and \p InputIterator2's \c value_type is convertible to \c OutputIterator2's \c value_type. - * \tparam BinaryPredicate is a model of Binary Predicate. - * - * \pre The input ranges shall not overlap either output range. - * - * The following code snippet demonstrates how to use \p unique_by_key_copy to - * compact a sequence of key/value pairs and with equal keys. - * - * \code - * #include - * ... - * const int N = 7; - * int A[N] = {1, 3, 3, 3, 2, 2, 1}; // input keys - * int B[N] = {9, 8, 7, 6, 5, 4, 3}; // input values - * int C[N]; // output keys - * int D[N]; // output values - * - * thrust::pair new_end; - * thrust::equal_to binary_pred; - * new_end = thrust::unique_by_key_copy(A, A + N, B, C, D, binary_pred); - * - * // The first four keys in C are now {1, 3, 2, 1} and new_end.first - C is 4. - * // The first four values in D are now {9, 8, 5, 3} and new_end.second - D is 4. - * \endcode - * - * \see unique_copy - * \see unique_by_key - * \see reduce_by_key - */ -template - thrust::pair - unique_by_key_copy(InputIterator1 keys_first, - InputIterator1 keys_last, - InputIterator2 values_first, - OutputIterator1 keys_result, - OutputIterator2 values_result, - BinaryPredicate binary_pred); - - -/*! \} // end stream_compaction - */ - - -} // end namespace thrust - -#include - diff --git a/compat/thrust/version.h b/compat/thrust/version.h deleted file mode 100644 index 730997eecc..0000000000 --- a/compat/thrust/version.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright 2008-2012 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/*! \file version.h - * \brief Compile-time macros encoding Thrust release version - * - * is the only Thrust header that is guaranteed to - * change with every thrust release. - */ - -#pragma once - -#include - -// This is the only thrust header that is guaranteed to -// change with every thrust release. -// -// THRUST_VERSION % 100 is the sub-minor version -// THRUST_VERSION / 100 % 1000 is the minor version -// THRUST_VERSION / 100000 is the major version - -/*! \def THRUST_VERSION - * \brief The preprocessor macro \p THRUST_VERSION encodes the version - * number of the Thrust library. - * - * THRUST_VERSION % 100 is the sub-minor version. - * THRUST_VERSION / 100 % 1000 is the minor version. - * THRUST_VERSION / 100000 is the major version. - */ -#define THRUST_VERSION 100700 - -/*! \def THRUST_MAJOR_VERSION - * \brief The preprocessor macro \p THRUST_MAJOR_VERSION encodes the - * major version number of the Thrust library. - */ -#define THRUST_MAJOR_VERSION (THRUST_VERSION / 100000) - -/*! \def THRUST_MINOR_VERSION - * \brief The preprocessor macro \p THRUST_MINOR_VERSION encodes the - * minor version number of the Thrust library. - */ -#define THRUST_MINOR_VERSION (THRUST_VERSION / 100 % 1000) - -/*! \def THRUST_SUBMINOR_VERSION - * \brief The preprocessor macro \p THRUST_SUBMINOR_VERSION encodes the - * sub-minor version number of the Thrust library. - */ -#define THRUST_SUBMINOR_VERSION (THRUST_VERSION % 100) - -// Declare these namespaces here for the purpose of Doxygenating them - -/*! \namespace thrust - * \brief \p thrust is the top-level namespace which contains all Thrust - * functions and types. - */ -namespace thrust -{ - -} - diff --git a/configure.ac b/configure.ac index 0cb83db9e1..b06c76e169 100644 --- a/configure.ac +++ b/configure.ac @@ -57,6 +57,7 @@ esac PTHREAD_FLAGS="-pthread" WS2_LIBS="" + case $target in *-*-mingw*) have_win32=true diff --git a/cpu-miner.c b/cpu-miner.c index 9e3c3b0ac3..b1cf7d8cee 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -7,19 +7,21 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. See COPYING for more details. */ - + #include "cpuminer-config.h" #define _GNU_SOURCE #include #include #include -#include +#include #include #include #include #include +#include #ifdef WIN32 + #include #else #include @@ -44,7 +46,7 @@ #pragma comment(lib, "winmm.lib") #endif -#define PROGRAM_NAME "minerd" +#define PROGRAM_NAME "ccminer djm edition" #define LP_SCANTIME 60 #define HEAVYCOIN_BLKHDR_SZ 84 #define MNR_BLKHDR_SZ 80 @@ -130,10 +132,24 @@ typedef enum { ALGO_JACKPOT, ALGO_QUARK, ALGO_ANIME, + ALGO_QUBIT, + ALGO_FRESH, ALGO_NIST5, ALGO_X11, ALGO_X13, + ALGO_X14, + ALGO_X15, + ALGO_X17, + ALGO_WH, + ALGO_KECCAK, + ALGO_M7, + ALGO_LYRA, + ALGO_NEOSCRYPT, + ALGO_PLUCK, + ALGO_DEEP, + ALGO_DOOM, ALGO_DMD_GR, + ALGO_GOAL, } sha256_algos; static const char *algo_names[] = { @@ -145,10 +161,24 @@ static const char *algo_names[] = { "jackpot", "quark", "anime", + "qubit", + "fresh", "nist5", "x11", "x13", + "x14", + "x15", + "x17", + "whirlcoin", + "keccak", + "m7", + "lyra2", + "neoscrypt", + "pluck", + "deep", + "doom", "dmd-gr", + "goalcoin", }; bool opt_debug = false; @@ -157,8 +187,12 @@ bool opt_benchmark = false; bool want_longpoll = true; bool have_longpoll = false; bool want_stratum = true; +bool have_gbt = true; bool have_stratum = false; +bool allow_getwork = true; +bool opt_redirect = true; static bool submit_old = false; +static char* lp_id; bool use_syslog = false; static bool opt_background = false; static bool opt_quiet = false; @@ -170,15 +204,19 @@ static json_t *opt_config; static const bool opt_time = true; static sha256_algos opt_algo = ALGO_HEAVY; static int opt_n_threads = 0; -static double opt_difficulty = 1; // CH +static double opt_difficulty = 1.; // CH bool opt_trust_pool = false; uint16_t opt_vote = 9999; static int num_processors; int device_map[8] = {0,1,2,3,4,5,6,7}; // CB char *device_name[8]; // CB +float tp_coef[8] = { -1.0}; static char *rpc_url; static char *rpc_userpass; static char *rpc_user, *rpc_pass; +static int pk_script_size; +static unsigned char pk_script[25]; +static char coinbase_sig[101] = ""; char *opt_cert; char *opt_proxy; long opt_proxy_type; @@ -188,6 +226,13 @@ int longpoll_thr_id = -1; int stratum_thr_id = -1; struct work_restart *work_restart = NULL; static struct stratum_ctx stratum; +//// m7 stuff +static unsigned char pblank[1]; +const void* ptr; + size_t sz; +uint32_t *m7buf; +//////////////// + pthread_mutex_t applog_lock; static pthread_mutex_t stats_lock; @@ -205,7 +250,7 @@ struct option { int *flag; int val; }; -#endif +#endif static char const usage[] = "\ Usage: " PROGRAM_NAME " [OPTIONS]\n\ @@ -219,14 +264,29 @@ Options:\n\ jackpot Jackpot hash\n\ quark Quark hash\n\ anime Animecoin hash\n\ + qubit qubitcoin hash\n\ + fresh freshcoin hash\n\ nist5 NIST5 (TalkCoin) hash\n\ x11 X11 (DarkCoin) hash\n\ x13 X13 (MaruCoin) hash\n\ + x14 X14 (MoronCoin) hash\n\ + x15 X15 (BitBlock) hash\n\ + x17 X17 (people currency coin) hash\n\ + whirlcoin whirlcoin (whirlcoin) hash\n\ + keccak keccak256 (maxcoin) hash\n\ + m7 m7 (crytonite) hash\n\ + lyra2 lyra2RE (VertCoin) hash\n\ + neoscrypt neoscrypt (FeatherCoin) hash\n\ + pluck pluck (SupCoin) hash\n\ + deep deep (deepcoin) hash\n\ + doom doomcoin hash\n\ dmd-gr Diamond-Groestl hash\n\ + goalcoin goalcoin hash\n\ -d, --devices takes a comma separated list of CUDA devices to use.\n\ Device IDs start counting from 0! Alternatively takes\n\ string names of your cards like gtx780ti or gt640#2\n\ (matching 2nd gt640 in the PC)\n\ + -F, --throughput coefficient to apply to the number of threads\n\ -f, --diff Divide difficulty by this factor (std is 1) \n\ -v, --vote=VOTE block reward vote (for HeavyCoin)\n\ -m, --trust-pool trust the max block reward vote (maxvote) sent by the pool\n\ @@ -270,8 +330,8 @@ static char const short_options[] = #ifdef HAVE_SYSLOG_H "S" #endif - "a:c:Dhp:Px:qr:R:s:t:T:o:u:O:Vd:f:mv:"; - + "a:c:Dhp:Px:qr:R:s:t:T:o:u:O:Vd:F:f:mv:"; + static struct option const options[] = { { "algo", 1, NULL, 'a' }, #ifndef WIN32 @@ -279,9 +339,13 @@ static struct option const options[] = { #endif { "benchmark", 0, NULL, 1005 }, { "cert", 1, NULL, 1001 }, + { "coinbase-addr", 1, NULL, 1013 }, + { "coinbase-sig", 1, NULL, 1015 }, { "config", 1, NULL, 'c' }, { "debug", 0, NULL, 'D' }, { "help", 0, NULL, 'h' }, + { "no-gbt", 0, NULL, 1011 }, + { "no-getwork", 0, NULL, 1010 }, { "no-longpoll", 0, NULL, 1003 }, { "no-stratum", 0, NULL, 1007 }, { "pass", 1, NULL, 'p' }, @@ -303,24 +367,62 @@ static struct option const options[] = { { "userpass", 1, NULL, 'O' }, { "version", 0, NULL, 'V' }, { "devices", 1, NULL, 'd' }, + { "throughput", 1, NULL, 'F'}, { "diff", 1, NULL, 'f' }, { 0, 0, 0, 0 } }; struct work { - uint32_t data[32]; + + union { + uint16_t data16[64]; + uint32_t data[32]; + uint64_t data64[16]; + }; uint32_t target[8]; uint32_t maxvote; - + uint32_t hash[8]; +int height; +char *txs; +char *workid; char job_id[128]; size_t xnonce2_len; unsigned char xnonce2[32]; }; - +/* +struct work7 { + CBlockHeader data; + uint32_t target[8],hash[8]; +}; +*/ static struct work g_work; static time_t g_work_time; static pthread_mutex_t g_work_lock; +static inline void work_free(struct work *w) +{ + free(w->txs); + free(w->workid); + free(w->job_id); + free(w->xnonce2); +} + +static inline void work_copy(struct work *dest, const struct work *src) +{ + memcpy(dest, src, sizeof(struct work)); + if (src->txs) + dest->txs = strdup(src->txs); + if (src->workid) + dest->workid = strdup(src->workid); +// if (src->job_id) +// dest->job_id = strdup(src->job_id); +// if (src->xnonce2) { +// dest->xnonce2 = (unsigned char*) malloc(src->xnonce2_len); +// memcpy(dest->xnonce2, src->xnonce2, src->xnonce2_len); +// } +} + + static bool jobj_binary(const json_t *obj, const char *key, void *buf, size_t buflen) { @@ -346,9 +448,22 @@ static bool jobj_binary(const json_t *obj, const char *key, static bool work_decode(const json_t *val, struct work *work) { int i; + if (opt_algo == ALGO_M7) { + // printf("\n work decode \n"); + + if (unlikely(!jobj_binary(val, "data", work->data, 122))) { + applog(LOG_ERR, "JSON invalid data"); + goto err_out; + } + if (unlikely(!jobj_binary(val, "target", work->target, sizeof(work->target)))) { + applog(LOG_ERR, "JSON invalid target"); + goto err_out; + } - if (unlikely(!jobj_binary(val, "data", work->data, sizeof(work->data)))) { - applog(LOG_ERR, "JSON inval data"); + + } else { + if (unlikely(!jobj_binary(val, "data", work->data, (opt_algo==ALGO_NEOSCRYPT)?84:sizeof(work->data)))) { + applog(LOG_ERR, "JSON inval data fucked up"); goto err_out; } if (unlikely(!jobj_binary(val, "target", work->target, sizeof(work->target)))) { @@ -360,18 +475,299 @@ static bool work_decode(const json_t *val, struct work *work) work->maxvote = 1024; } } else work->maxvote = 0; - - for (i = 0; i < ARRAY_SIZE(work->data); i++) +int data_size = (opt_algo == ALGO_NEOSCRYPT) ? 21 : ARRAY_SIZE(work->data); + for (i = 0; i < data_size; i++) work->data[i] = le32dec(work->data + i); for (i = 0; i < ARRAY_SIZE(work->target); i++) work->target[i] = le32dec(work->target + i); - + } return true; err_out: return false; } +#define BLOCK_VERSION_MASK 0x000000ff +#define BLOCK_VERSION_CURRENT 3 + +static bool gbt_work_decode(const json_t *val, struct work *work) +{ + int i, n; + uint32_t version, curtime, bits; + uint32_t prevhash[8]; + uint32_t target[8]; + int cbtx_size; + unsigned char *cbtx = NULL; + int tx_count, tx_size; + unsigned char txc_vi[9]; + unsigned char(*merkle_tree)[32] = NULL; + bool coinbase_append = false; + bool submit_coinbase = false; + bool version_force = false; + bool version_reduce = false; + json_t *tmp, *txa; + bool rc = false; + + tmp = json_object_get(val, "mutable"); + if (tmp && json_is_array(tmp)) { + n = json_array_size(tmp); + for (i = 0; i < n; i++) { + const char *s = json_string_value(json_array_get(tmp, i)); + if (!s) + continue; + if (!strcmp(s, "coinbase/append")) + coinbase_append = true; + else if (!strcmp(s, "submit/coinbase")) + submit_coinbase = true; + else if (!strcmp(s, "version/force")) + version_force = true; + else if (!strcmp(s, "version/reduce")) + version_reduce = true; + } + } + + tmp = json_object_get(val, "height"); + if (!tmp || !json_is_integer(tmp)) { + applog(LOG_ERR, "JSON invalid height"); + goto out; + } + work->height = json_integer_value(tmp); + + tmp = json_object_get(val, "version"); + if (!tmp || !json_is_integer(tmp)) { + applog(LOG_ERR, "JSON invalid version"); + goto out; + } + version = json_integer_value(tmp); + if ((version & BLOCK_VERSION_MASK) > BLOCK_VERSION_CURRENT) { + if (version_reduce) { + version = (version & ~BLOCK_VERSION_MASK) | BLOCK_VERSION_CURRENT; + } + else if (!version_force) { + applog(LOG_ERR, "Unrecognized block version: %u", version); + goto out; + } + } + + if (unlikely(!jobj_binary(val, "previousblockhash", prevhash, sizeof(prevhash)))) { + applog(LOG_ERR, "JSON invalid previousblockhash"); + goto out; + } + + tmp = json_object_get(val, "curtime"); + if (!tmp || !json_is_integer(tmp)) { + applog(LOG_ERR, "JSON invalid curtime"); + goto out; + } + curtime = json_integer_value(tmp); + + if (unlikely(!jobj_binary(val, "bits", &bits, sizeof(bits)))) { + applog(LOG_ERR, "JSON invalid bits"); + goto out; + } + + /* find count and size of transactions */ + txa = json_object_get(val, "transactions"); + if (!txa || !json_is_array(txa)) { + applog(LOG_ERR, "JSON invalid transactions"); + goto out; + } + tx_count = json_array_size(txa); + tx_size = 0; + for (i = 0; i < tx_count; i++) { + const json_t *tx = json_array_get(txa, i); + const char *tx_hex = json_string_value(json_object_get(tx, "data")); + if (!tx_hex) { + applog(LOG_ERR, "JSON invalid transactions"); + goto out; + } + tx_size += strlen(tx_hex) / 2; + } + + /* build coinbase transaction */ + tmp = json_object_get(val, "coinbasetxn"); + if (tmp) { + const char *cbtx_hex = json_string_value(json_object_get(tmp, "data")); + cbtx_size = cbtx_hex ? strlen(cbtx_hex) / 2 : 0; + cbtx = (unsigned char*)malloc(cbtx_size + 100); + if (cbtx_size < 60 || !hex2bin(cbtx, cbtx_hex, cbtx_size)) { + applog(LOG_ERR, "JSON invalid coinbasetxn"); + goto out; + } + } + else { + int64_t cbvalue; + if (!pk_script_size) { + if (allow_getwork) { + applog(LOG_INFO, "No payout address provided, switching to getwork"); + have_gbt = false; + } + else + applog(LOG_ERR, "No payout address provided"); + goto out; + } + tmp = json_object_get(val, "coinbasevalue"); + if (!tmp || !json_is_number(tmp)) { + applog(LOG_ERR, "JSON invalid coinbasevalue"); + goto out; + } + cbvalue = json_is_integer(tmp) ? json_integer_value(tmp) : json_number_value(tmp); + cbtx = (unsigned char*) malloc(256); + le32enc((uint32_t *)cbtx, 1); /* version */ + cbtx[4] = 1; /* in-counter */ + memset(cbtx + 5, 0x00, 32); /* prev txout hash */ + le32enc((uint32_t *)(cbtx + 37), 0xffffffff); /* prev txout index */ + cbtx_size = 43; + /* BIP 34: height in coinbase */ + for (n = work->height; n; n >>= 8) + cbtx[cbtx_size++] = n & 0xff; + cbtx[42] = cbtx_size - 43; + cbtx[41] = cbtx_size - 42; /* scriptsig length */ + le32enc((uint32_t *)(cbtx + cbtx_size), 0xffffffff); /* sequence */ + cbtx_size += 4; + cbtx[cbtx_size++] = 1; /* out-counter */ + le32enc((uint32_t *)(cbtx + cbtx_size), (uint32_t)cbvalue); /* value */ + le32enc((uint32_t *)(cbtx + cbtx_size + 4), cbvalue >> 32); + cbtx_size += 8; + cbtx[cbtx_size++] = pk_script_size; /* txout-script length */ + memcpy(cbtx + cbtx_size, pk_script, pk_script_size); + cbtx_size += pk_script_size; + le32enc((uint32_t *)(cbtx + cbtx_size), 0); /* lock time */ + cbtx_size += 4; + coinbase_append = true; + } + if (coinbase_append) { + unsigned char xsig[100]; + int xsig_len = 0; + if (*coinbase_sig) { + n = strlen(coinbase_sig); + if (cbtx[41] + xsig_len + n <= 100) { + memcpy(xsig + xsig_len, coinbase_sig, n); + xsig_len += n; + } + else { + applog(LOG_WARNING, "Signature does not fit in coinbase, skipping"); + } + } + tmp = json_object_get(val, "coinbaseaux"); + if (tmp && json_is_object(tmp)) { + void *iter = json_object_iter(tmp); + while (iter) { + unsigned char buf[100]; + const char *s = json_string_value(json_object_iter_value(iter)); + n = s ? strlen(s) / 2 : 0; + if (!s || n > 100 || !hex2bin(buf, s, n)) { + applog(LOG_ERR, "JSON invalid coinbaseaux"); + break; + } + if (cbtx[41] + xsig_len + n <= 100) { + memcpy(xsig + xsig_len, buf, n); + xsig_len += n; + } + iter = json_object_iter_next(tmp, iter); + } + } + if (xsig_len) { + unsigned char *ssig_end = cbtx + 42 + cbtx[41]; + int push_len = cbtx[41] + xsig_len < 76 ? 1 : + cbtx[41] + 2 + xsig_len > 100 ? 0 : 2; + n = xsig_len + push_len; + memmove(ssig_end + n, ssig_end, cbtx_size - 42 - cbtx[41]); + cbtx[41] += n; + if (push_len == 2) + *(ssig_end++) = 0x4c; /* OP_PUSHDATA1 */ + if (push_len) + *(ssig_end++) = xsig_len; + memcpy(ssig_end, xsig, xsig_len); + cbtx_size += n; + } + } + + n = varint_encode(txc_vi, 1 + tx_count); + work->txs = (char*)malloc(2 * (n + cbtx_size + tx_size) + 1); + abin2hex(work->txs, txc_vi, n); + abin2hex(work->txs + 2 * n, cbtx, cbtx_size); + + /* generate merkle root */ + merkle_tree = (unsigned char(*)[32]) malloc(32 * ((1 + tx_count + 1) & ~1)); + sha256d(merkle_tree[0], cbtx, cbtx_size); + for (i = 0; i < tx_count; i++) { + tmp = json_array_get(txa, i); + const char *tx_hex = json_string_value(json_object_get(tmp, "data")); + const int tx_size = tx_hex ? strlen(tx_hex) / 2 : 0; + unsigned char *tx = (unsigned char*)malloc(tx_size); + if (!tx_hex || !hex2bin(tx, tx_hex, tx_size)) { + applog(LOG_ERR, "JSON invalid transactions"); + free(tx); + goto out; + } + sha256d(merkle_tree[1 + i], tx, tx_size); + if (!submit_coinbase) + strcat(work->txs, tx_hex); + } + n = 1 + tx_count; + while (n > 1) { + if (n % 2) { + memcpy(merkle_tree[n], merkle_tree[n - 1], 32); + ++n; + } + n /= 2; + for (i = 0; i < n; i++) + sha256d(merkle_tree[i], merkle_tree[2 * i], 64); + } + + /* assemble block header */ + work->data[0] = swab32(version); + for (i = 0; i < 8; i++) + work->data[8 - i] = le32dec(prevhash + i); + for (i = 0; i < 8; i++) + work->data[9 + i] = be32dec((uint32_t *)merkle_tree[0] + i); + work->data[17] = swab32(curtime); + work->data[18] = le32dec(&bits); + memset(work->data + 19, 0x00, 52); + work->data[20] = 0x80000000; + work->data[31] = 0x00000280; + + if (unlikely(!jobj_binary(val, "target", target, sizeof(target)))) { + applog(LOG_ERR, "JSON invalid target"); + goto out; + } + for (i = 0; i < ARRAY_SIZE(work->target); i++) + work->target[7 - i] = be32dec(target + i); + + tmp = json_object_get(val, "workid"); + if (tmp) { + if (!json_is_string(tmp)) { + applog(LOG_ERR, "JSON invalid workid"); + goto out; + } + work->workid = strdup(json_string_value(tmp)); + } + + /* Long polling */ + tmp = json_object_get(val, "longpollid"); + if (want_longpoll && json_is_string(tmp)) { + free(lp_id); + lp_id = strdup(json_string_value(tmp)); + if (!have_longpoll) { + char *lp_uri; + tmp = json_object_get(val, "longpolluri"); + lp_uri = json_is_string(tmp) ? strdup(json_string_value(tmp)) : rpc_url; + have_longpoll = true; + tq_push(thr_info[longpoll_thr_id].q, lp_uri); + } + } + + rc = true; + +out: + free(cbtx); + free(merkle_tree); + return rc; +} + + +/* static void share_result(int result, const char *reason) { char s[345]; @@ -396,23 +792,90 @@ static void share_result(int result, const char *reason) if (opt_debug && reason) applog(LOG_DEBUG, "DEBUG: reject reason: %s", reason); } +*/ +int hashratessize=250; +double hashrates [250]= { }; +double totalhashrate = 0.; +double totalhashsquare =0.; +int hashcomplete=0; +int hashrow=0; +static void share_result(int result, const char *reason) +{ + char s[345]; + char s1[345]; + char s2[345]; + double hashrate; + int i; + double averagehashrate=0.; + double avsquare=0.; + double stddev=0.; + hashrate = 0.; + pthread_mutex_lock(&stats_lock); + for (i = 0; i < opt_n_threads; i++) + hashrate += thr_hashrates[i]; + result ? accepted_count++ : rejected_count++; + pthread_mutex_unlock(&stats_lock); + + sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate); + totalhashrate+=(double) hashrate; + totalhashsquare+=pow((double)hashrate,2); + hashrow++; + averagehashrate=totalhashrate/(double)hashrow; + avsquare=totalhashsquare/(double)hashrow; + stddev = sqrt(avsquare-pow(averagehashrate,2)); + sprintf(s1, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * averagehashrate); + sprintf(s2, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * stddev); + + applog(LOG_INFO, "accepted: %lu/%lu (%.2f%%), %s kh/s (%s +/- %s) %s", + accepted_count, + accepted_count + rejected_count, + 100. * accepted_count / (accepted_count + rejected_count), + s,s1,s2, result ? "(yay!!!)" : "(booooo)"); + + if (opt_debug && reason) + applog(LOG_DEBUG, "DEBUG: reject reason: %s", reason); + +} static bool submit_upstream_work(CURL *curl, struct work *work) { char *str = NULL; json_t *val, *res, *reason; + char data_str[2 * sizeof(work->data) + 1]; char s[345]; int i; bool rc = false; /* pass if the previous hash is not the current previous hash */ + if (opt_algo == ALGO_M7) { + if (memcmp(work->data , g_work.data , 96)) { + if (opt_debug) + applog(LOG_DEBUG, "DEBUG: stale work detected, discarding"); + return true; + } + } else { if (memcmp(work->data + 1, g_work.data + 1, 32)) { if (opt_debug) applog(LOG_DEBUG, "DEBUG: stale work detected, discarding"); return true; } - + } if (have_stratum) { + if (opt_algo == ALGO_M7) { + + uint64_t ntime, nonce; + char *ntimestr, *noncestr, *xnonce2str; + + be64enc(&ntime, work->data64[12]); + be32enc(&nonce, work->data[29]); + ntimestr=bin2hex((const unsigned char *)(&ntime), 8); + noncestr=bin2hex((const unsigned char *)(&nonce), 4); + xnonce2str = bin2hex(work->xnonce2, work->xnonce2_len); + sprintf(s, + "{\"method\": \"mining.submit\", \"params\": [\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"id\":4}", + rpc_user, work->job_id, xnonce2str, ntimestr, noncestr); + free(xnonce2str); + } else { uint32_t ntime, nonce; uint16_t nvote; char *ntimestr, *noncestr, *xnonce2str, *nvotestr; @@ -438,30 +901,101 @@ static bool submit_upstream_work(CURL *curl, struct work *work) free(noncestr); free(xnonce2str); free(nvotestr); - + } if (unlikely(!stratum_send_line(&stratum, s))) { applog(LOG_ERR, "submit_upstream_work stratum_send_line failed"); goto out; } +// gbt + } + else if (work->txs) { + char *req; + + for (i = 0; i < ARRAY_SIZE(work->data); i++) + be32enc(work->data + i, work->data[i]); + abin2hex(data_str, (unsigned char *)work->data, 80); + if (work->workid) { + char *params; + val = json_object(); + json_object_set_new(val, "workid", json_string(work->workid)); + params = json_dumps(val, 0); + json_decref(val); + req = (char*)malloc(128 + 2 * 80 + strlen(work->txs) + strlen(params)); + sprintf(req, + "{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n", + data_str, work->txs, params); + free(params); + } + else { + req = (char*)malloc(128 + 2 * 80 + strlen(work->txs)); + sprintf(req, + "{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n", + data_str, work->txs); + } + val = json_rpc_call2(curl, rpc_url, rpc_userpass, req, NULL, 0); + free(req); + if (unlikely(!val)) { + applog(LOG_ERR, "submit_upstream_work json_rpc_call failed"); + goto out; + } + + res = json_object_get(val, "result"); + if (json_is_object(res)) { + char *res_str; + bool sumres = false; + void *iter = json_object_iter(res); + while (iter) { + if (json_is_null(json_object_iter_value(iter))) { + sumres = true; + break; + } + iter = json_object_iter_next(res, iter); + } + res_str = json_dumps(res, 0); + share_result(sumres, res_str); + free(res_str); + } + else + share_result(json_is_null(res), json_string_value(res)); + + json_decref(val); +/// } else { /* build hex string */ - - if (opt_algo != ALGO_HEAVY && opt_algo != ALGO_MJOLLNIR) { - for (i = 0; i < ARRAY_SIZE(work->data); i++) + if (opt_algo != ALGO_M7) { + if (opt_algo != ALGO_HEAVY && opt_algo != ALGO_MJOLLNIR && opt_algo) { + int data_size = (opt_algo == ALGO_NEOSCRYPT) ? 80 : sizeof(work->data); + for (i = 0; i < (data_size >>2); i++) le32enc(work->data + i, work->data[i]); } - str = bin2hex((unsigned char *)work->data, sizeof(work->data)); + int data_size = (opt_algo == ALGO_NEOSCRYPT) ? 80 : sizeof(work->data); + str = bin2hex((unsigned char *)work->data,data_size); if (unlikely(!str)) { applog(LOG_ERR, "submit_upstream_work OOM"); goto out; - } + } + } else { + + + abin2hex(data_str,(unsigned char *)work->data, 122); + if (unlikely(!data_str)) { + applog(LOG_ERR, "submit_upstream_work OOM"); + goto out; + } + } // M7 + + if (opt_algo == ALGO_M7) { + sprintf(s, + "{\"method\": \"getwork\", \"params\": [ \"%s\" ], \"id\":1}\r\n", + data_str); + } else { /* build JSON-RPC request */ sprintf(s, "{\"method\": \"getwork\", \"params\": [ \"%s\" ], \"id\":1}\r\n", str); - + } /* issue JSON-RPC request */ val = json_rpc_call(curl, rpc_url, rpc_userpass, s, false, false, NULL); if (unlikely(!val)) { @@ -486,28 +1020,61 @@ static bool submit_upstream_work(CURL *curl, struct work *work) static const char *rpc_req = "{\"method\": \"getwork\", \"params\": [], \"id\":0}\r\n"; +#define GBT_CAPABILITIES "[\"coinbasetxn\", \"coinbasevalue\", \"longpoll\", \"workid\"]" +static const char *gbt_req = +"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": " +GBT_CAPABILITIES "}], \"id\":0}\r\n"; +static const char *gbt_lp_req = +"{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": " +GBT_CAPABILITIES ", \"longpollid\": \"%s\"}], \"id\":0}\r\n"; + + static bool get_upstream_work(CURL *curl, struct work *work) { json_t *val; bool rc; + int err; struct timeval tv_start, tv_end, diff; - +start: gettimeofday(&tv_start, NULL); - val = json_rpc_call(curl, rpc_url, rpc_userpass, rpc_req, - want_longpoll, false, NULL); + val = json_rpc_call2(curl, rpc_url, rpc_userpass, + have_gbt ? gbt_req : rpc_req, + &err, have_gbt ? JSON_RPC_QUIET_404 : 0); +// val = json_rpc_call(curl, rpc_url, rpc_userpass, rpc_req, +// want_longpoll, false, NULL); gettimeofday(&tv_end, NULL); - + if (have_stratum) { if (val) json_decref(val); return true; } + if (!have_gbt && !allow_getwork) { + applog(LOG_ERR, "No usable protocol"); + if (val) + json_decref(val); + return false; + } + + if (have_gbt && allow_getwork && !val && err == CURLE_OK) { + applog(LOG_INFO, "getblocktemplate failed, falling back to getwork"); + have_gbt = false; + goto start; + } + if (!val) return false; + if (have_gbt) { + rc = gbt_work_decode(json_object_get(val, "result"), work); + if (!have_gbt) { + json_decref(val); + goto start; + } + } else { rc = work_decode(json_object_get(val, "result"), work); - + } if (opt_debug && rc) { timeval_subtract(&diff, &tv_end, &tv_start); applog(LOG_DEBUG, "DEBUG: got new work in %d ms", @@ -526,6 +1093,7 @@ static void workio_cmd_free(struct workio_cmd *wc) switch (wc->cmd) { case WC_SUBMIT_WORK: +// work_free(wc->u.work); free(wc->u.work); break; default: /* do nothing */ @@ -541,6 +1109,7 @@ static bool workio_get_work(struct workio_cmd *wc, CURL *curl) struct work *ret_work; int failures = 0; + ret_work = (struct work*)calloc(1, sizeof(*ret_work)); if (!ret_work) return false; @@ -597,7 +1166,7 @@ static void *workio_thread(void *userdata) applog(LOG_ERR, "CURL initialization failed"); return NULL; } - + // printf("workio thread\n"); while (ok) { struct workio_cmd *wc; @@ -645,7 +1214,7 @@ static bool get_work(struct thr_info *thr, struct work *work) memset(work->target, 0x00, sizeof(work->target)); return true; } - + /* fill out work request message */ wc = (struct workio_cmd *)calloc(1, sizeof(*wc)); if (!wc) @@ -653,13 +1222,13 @@ static bool get_work(struct thr_info *thr, struct work *work) wc->cmd = WC_GET_WORK; wc->thr = thr; - + /* send work request to workio thread */ if (!tq_push(thr_info[work_thr_id].q, wc)) { workio_cmd_free(wc); return false; } - + /* wait for response, a unit of work */ work_heap = (struct work *)tq_pop(thr->q, NULL); if (!work_heap) @@ -668,7 +1237,7 @@ static bool get_work(struct thr_info *thr, struct work *work) /* copy returned work into storage provided by caller */ memcpy(work, work_heap, sizeof(*work)); free(work_heap); - + // printf("getwork 4\n"); return true; } @@ -686,7 +1255,8 @@ static bool submit_work(struct thr_info *thr, const struct work *work_in) wc->cmd = WC_SUBMIT_WORK; wc->thr = thr; - memcpy(wc->u.work, work_in, sizeof(*work_in)); +// memcpy(wc->u.work, work_in, sizeof(*work_in)); + work_copy(wc->u.work, work_in); /* send solution to workio thread */ if (!tq_push(thr_info[work_thr_id].q, wc)) @@ -703,7 +1273,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) { unsigned char merkle_root[64]; int i; - + // printf("\n stratum_gen_work\n "); pthread_mutex_lock(&sctx->work_lock); strcpy(work->job_id, sctx->job.job_id); @@ -714,7 +1284,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) if (opt_algo == ALGO_HEAVY || opt_algo == ALGO_MJOLLNIR) heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); else - if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL) + if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_WH || opt_algo == ALGO_KECCAK ) SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root); else sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); @@ -768,28 +1338,70 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) applog(LOG_DEBUG, "DEBUG: job_id='%s' extranonce2=%s ntime=%08x", work->job_id, xnonce2str, swab32(work->data[17])); free(xnonce2str); - } - - if (opt_algo == ALGO_JACKPOT) + } + + if (opt_algo == ALGO_JACKPOT || opt_algo == ALGO_NEOSCRYPT || opt_algo == ALGO_PLUCK) diff_to_target(work->target, sctx->job.diff / (65536.0 * opt_difficulty)); - else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR) + else if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL || opt_algo == ALGO_DMD_GR || opt_algo == ALGO_FRESH) diff_to_target(work->target, sctx->job.diff / (256.0 * opt_difficulty)); + else if (opt_algo == ALGO_KECCAK ) // || opt_algo == ALGO_LYRA) + diff_to_target(work->target, sctx->job.diff / (128.0 * opt_difficulty)); // seems to work best, minimize rejected share else diff_to_target(work->target, sctx->job.diff / opt_difficulty); } +static void stratum_gen_work_m7(struct stratum_ctx *sctx, struct work *work) +{ + + pthread_mutex_lock(&sctx->work_lock); + strcpy(work->job_id, sctx->job.job_id); + work->xnonce2_len = sctx->xnonce2_size; + memcpy(work->xnonce2, sctx->job.xnonce2, sctx->xnonce2_size); + + /* Increment extranonce2 */ + for (int i = 0; i < (int) sctx->xnonce2_size && !++sctx->job.xnonce2[i]; i++); + + /* Assemble block header */ + memset(work->data, 0, 122); + memcpy(work->data, sctx->job.m7prevblock, 32); + memcpy(work->data + 8, sctx->job.m7accroot, 32); + memcpy(work->data + 16, sctx->job.m7merkleroot, 32); + work->data64[12] = be64dec(sctx->job.m7ntime); + work->data64[13] = be64dec(sctx->job.m7height); + unsigned char *xnonce_ptr = (unsigned char *)(work->data + 28); + for (int i = 0; i < (int) sctx->xnonce1_size; i++) { + *(xnonce_ptr + i) = sctx->xnonce1[i]; + } + for (int i = 0; i < (int) work->xnonce2_len; i++) { + *(xnonce_ptr + sctx->xnonce1_size + i) = work->xnonce2[i]; + } + work->data16[60] = be16dec(sctx->job.m7version); + + pthread_mutex_unlock(&sctx->work_lock); + + diff_to_target(work->target, sctx->job.diff / (65536.0* opt_difficulty)); + + if (opt_debug) { + char data_str[245], target_str[65]; + abin2hex(data_str, (unsigned char *)work->data, 122); + applog(LOG_DEBUG, "DEBUG: stratum_gen_work data %s", data_str); + abin2hex(target_str, (unsigned char *)work->target, 32); + applog(LOG_DEBUG, "DEBUG: stratum_gen_work target %s", target_str); + } +} + static void *miner_thread(void *userdata) { struct thr_info *mythr = (struct thr_info *)userdata; int thr_id = mythr->id; struct work work; uint32_t max_nonce; - uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20; + uint32_t end_nonce = (0xffffffffU) / opt_n_threads * (thr_id + 1) - 0x20; unsigned char *scratchbuf = NULL; char s[16]; - int i; - static int rounds = 0; + static int rounds = 0; + memset(&work, 0, sizeof(work)); // prevent work from being used uninitialized /* Set worker threads to nice 19 and then preferentially to SCHED_IDLE @@ -808,25 +1420,47 @@ static void *miner_thread(void *userdata) thr_id, thr_id % num_processors); affine_to_cpu(thr_id, thr_id % num_processors); } - + // printf("\n miner threads 2\n"); while (1) { unsigned long hashes_done; + struct timeval tv_start, tv_end, diff; int64_t max64; int rc; if (have_stratum) { + while (time(NULL) >= g_work_time + 120) sleep(1); pthread_mutex_lock(&g_work_lock); - if (work.data[19] >= end_nonce) - stratum_gen_work(&stratum, &g_work); + bool nonce_over; + if (opt_algo == ALGO_M7) { + nonce_over = work.data[29] >= end_nonce; + } else { + nonce_over = work.data[19] >= end_nonce; + } + // printf("nonce over %d\n",nonce_over); + if (opt_algo == ALGO_M7) { + if (work.data[29] >= end_nonce && !memcmp(work.data, g_work.data, 116)) + stratum_gen_work_m7(&stratum, &g_work); + + } else { + + if (work.data[19] >= end_nonce && !memcmp(work.data, g_work.data, 76)) + stratum_gen_work(&stratum, &g_work); + } } else { + int min_scantime = have_longpoll ? LP_SCANTIME : opt_scantime; /* obtain new work from internal workio thread */ pthread_mutex_lock(&g_work_lock); - if (!have_stratum && (!have_longpoll || - time(NULL) >= g_work_time + LP_SCANTIME*3/4 || - work.data[19] >= end_nonce)) { + bool nonce_over; + if (opt_algo == ALGO_M7) { + nonce_over = work.data[29] >= end_nonce; + } else { + nonce_over = work.data[19] >= end_nonce; + } + + if (!have_stratum && (time(NULL) - g_work_time >= min_scantime || nonce_over)) { if (unlikely(!get_work(mythr, &g_work))) { applog(LOG_ERR, "work retrieval failed, exiting " "mining thread %d", mythr->id); @@ -835,16 +1469,33 @@ static void *miner_thread(void *userdata) } g_work_time = have_stratum ? 0 : time(NULL); } + } +///weird stuff +/* if (have_stratum) { pthread_mutex_unlock(&g_work_lock); continue; } - } +*/ + if (opt_algo == ALGO_M7) { + + + if (memcmp(work.data, g_work.data, 116)) { + memcpy(&work, &g_work, sizeof(struct work)); +// work_free(&work); +// work_copy(&work, &g_work); + work.data[29] = (0xffffffffU) / opt_n_threads * thr_id; + } else + work.data[29]++; // todo + } else { if (memcmp(work.data, g_work.data, 76)) { memcpy(&work, &g_work, sizeof(struct work)); +// work_free(&work); +// work_copy(&work, &g_work); work.data[19] = 0xffffffffU / opt_n_threads * thr_id; } else work.data[19]++; + } pthread_mutex_unlock(&g_work_lock); work_restart[thr_id].restart = 0; @@ -855,12 +1506,36 @@ static void *miner_thread(void *userdata) max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime) - time(NULL); max64 *= (int64_t)thr_hashrates[thr_id]; - if (max64 <= 0) - max64 = (opt_algo == ALGO_JACKPOT) ? 0x1fffLL : 0xfffffLL; - if ((int64_t)work.data[19] + max64 > end_nonce) - max_nonce = end_nonce; - else - max_nonce = (uint32_t)(work.data[19] + max64); + + if (max64 <= 0) { + switch (opt_algo) { + case ALGO_JACKPOT: + max64 = 0x1fffLL; + break; + case ALGO_NEOSCRYPT: + case ALGO_PLUCK: + max64 = 0xfffLL; + break; + case ALGO_M7: + max64 = 0x3ffffLL; + break; + default: + max64 = 0xfffffLL; + break; + } + } + if (opt_algo == ALGO_M7) { + if ((int64_t) work.data[29] + max64 > (int64_t) end_nonce) + max_nonce = end_nonce; + else + max_nonce = (uint32_t)(work.data[29] + max64); + } else { + if ((int64_t) work.data[19] + max64 > (int64_t) end_nonce) { + max_nonce = end_nonce;} + else { + max_nonce = (uint32_t) (work.data[19] + max64);} + } + hashes_done = 0; gettimeofday(&tv_start, NULL); @@ -908,7 +1583,18 @@ static void *miner_thread(void *userdata) rc = scanhash_anime(thr_id, work.data, work.target, max_nonce, &hashes_done); break; - + case ALGO_QUBIT: + rc = scanhash_qubit(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + case ALGO_DOOM: + rc = scanhash_doom(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + case ALGO_FRESH: + rc = scanhash_fresh(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; case ALGO_NIST5: rc = scanhash_nist5(thr_id, work.data, work.target, max_nonce, &hashes_done); @@ -923,35 +1609,75 @@ static void *miner_thread(void *userdata) rc = scanhash_x13(thr_id, work.data, work.target, max_nonce, &hashes_done); break; + case ALGO_X14: + rc = scanhash_x14(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + + case ALGO_X15: + rc = scanhash_x15(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + + case ALGO_X17: + rc = scanhash_x17(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + case ALGO_M7: + + rc = scanhash_m7(thr_id,work.data, work.target,max_nonce, &hashes_done); + + break; + case ALGO_LYRA: + rc = scanhash_lyra(thr_id,work.data, work.target,max_nonce, &hashes_done); + break; + + case ALGO_PLUCK: + rc = scanhash_pluck(thr_id, work.data, work.target, max_nonce, &hashes_done); + break; + + + case ALGO_WH: + rc = scanhash_wh(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + case ALGO_DEEP: + rc = scanhash_deep(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; + case ALGO_KECCAK: + rc = scanhash_keccak256(thr_id, work.data, work.target, + max_nonce, &hashes_done); + break; default: /* should never happen */ goto out; } -// if (opt_benchmark) -// if (++rounds == 1) exit(0); /* record scanhash elapsed time */ gettimeofday(&tv_end, NULL); + + timeval_subtract(&diff, &tv_end, &tv_start); if (diff.tv_usec || diff.tv_sec) { pthread_mutex_lock(&stats_lock); - thr_hashrates[thr_id] = - hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec); + thr_hashrates[thr_id] = hashes_done / (diff.tv_sec + 1e-6 * diff.tv_usec); pthread_mutex_unlock(&stats_lock); } + if (!opt_quiet) { sprintf(s, thr_hashrates[thr_id] >= 1e6 ? "%.0f" : "%.2f", 1e-3 * thr_hashrates[thr_id]); applog(LOG_INFO, "GPU #%d: %s, %s khash/s", device_map[thr_id], device_name[thr_id], s); -// applog(LOG_INFO, "thread %d: %lu hashes, %s khash/s", -// thr_id, hashes_done, s); } + if (opt_benchmark && thr_id == opt_n_threads - 1) { double hashrate = 0.; - for (i = 0; i < opt_n_threads && thr_hashrates[i]; i++) + int i; + for (i = 0; i < opt_n_threads && thr_hashrates[i]; i++) hashrate += thr_hashrates[i]; if (i == opt_n_threads) { sprintf(s, hashrate >= 1e6 ? "%.0f" : "%.2f", 1e-3 * hashrate); @@ -1018,11 +1744,22 @@ static void *longpoll_thread(void *userdata) applog(LOG_INFO, "Long-polling activated for %s", lp_url); while (1) { - json_t *val, *soval; + char *req = NULL; + json_t *val, *soval, *res; int err; + if (have_gbt) { + req = (char*)malloc(strlen(gbt_lp_req) + strlen(lp_id) + 1); + sprintf(req, gbt_lp_req, lp_id); + } +// val = json_rpc_call(curl, lp_url, rpc_userpass, rpc_req, +// false, true, &err); + + val = json_rpc_call2(curl, lp_url, rpc_userpass, + req ? req : rpc_req, &err, + JSON_RPC_LONGPOLL); + free(req); + - val = json_rpc_call(curl, lp_url, rpc_userpass, rpc_req, - false, true, &err); if (have_stratum) { if (val) json_decref(val); @@ -1030,15 +1767,27 @@ static void *longpoll_thread(void *userdata) } if (likely(val)) { if (!opt_quiet) applog(LOG_INFO, "LONGPOLL detected new block"); - soval = json_object_get(json_object_get(val, "result"), "submitold"); + res = json_object_get(val, "result"); + soval = json_object_get(res, "submitold"); submit_old = soval ? json_is_true(soval) : false; pthread_mutex_lock(&g_work_lock); + bool rc; + if (have_gbt) + rc = gbt_work_decode(res, &g_work); + else + rc = work_decode(res, &g_work); + if (rc) { + time(&g_work_time); + restart_threads(); + } +/* if (work_decode(json_object_get(val, "result"), &g_work)) { if (opt_debug) applog(LOG_DEBUG, "DEBUG: got new work"); time(&g_work_time); restart_threads(); } +*/ pthread_mutex_unlock(&g_work_lock); json_decref(val); } else { @@ -1103,7 +1852,7 @@ static void *stratum_thread(void *userdata) { struct thr_info *mythr = (struct thr_info *)userdata; char *s; - + // printf("coming here stratum thread"); stratum.url = (char*)tq_pop(mythr->q, NULL); if (!stratum.url) goto out; @@ -1135,7 +1884,11 @@ static void *stratum_thread(void *userdata) if (stratum.job.job_id && (strcmp(stratum.job.job_id, g_work.job_id) || !g_work_time)) { pthread_mutex_lock(&g_work_lock); - stratum_gen_work(&stratum, &g_work); + if (opt_algo == ALGO_M7) { + stratum_gen_work_m7(&stratum, &g_work); + } else { + stratum_gen_work(&stratum, &g_work); + } time(&g_work_time); pthread_mutex_unlock(&g_work_lock); if (stratum.job.clean) { @@ -1154,8 +1907,13 @@ static void *stratum_thread(void *userdata) applog(LOG_ERR, "Stratum connection interrupted"); continue; } + if (opt_algo == ALGO_M7) { + if (!stratum_handle_method_m7(&stratum, s)) + stratum_handle_response(s); + } else { if (!stratum_handle_method(&stratum, s)) stratum_handle_response(s); + } free(s); } @@ -1165,7 +1923,7 @@ static void *stratum_thread(void *userdata) static void show_version_and_exit(void) { - printf("%s\n%s\n", PACKAGE_STRING, curl_version()); + printf("%s\n%s\n", PACKAGE_STRING, curl_version()); exit(0); } @@ -1351,6 +2109,29 @@ static void parse_arg (int key, char *arg) case 1007: want_stratum = false; break; + case 1010: + allow_getwork = false; + break; + case 1011: + have_gbt = false; + break; + case 1013: /* --coinbase-addr */ + pk_script_size = address_to_script(pk_script, sizeof(pk_script), arg); + if (!pk_script_size) { +/* + fprintf(stderr, "%s: invalid address -- '%s'\n", + pname, arg); +*/ + show_usage_and_exit(1); + } + break; + case 1015: /* --coinbase-sig */ + if (strlen(arg) + 1 > sizeof(coinbase_sig)) { +// fprintf(stderr, "%s: coinbase signature too long\n", pname); + show_usage_and_exit(1); + } + strcpy(coinbase_sig, arg); + break; case 'S': use_syslog = true; break; @@ -1378,8 +2159,21 @@ static void parse_arg (int key, char *arg) } pch = strtok (NULL, ","); } - } + } break; + + case 'F': + { + char * pch = strtok (arg,","); + int tmp_n_threads = 0; + float last = 0; + while (pch != NULL) { + tp_coef[tmp_n_threads++] = last = atof(pch); + pch = strtok (NULL, ","); + } + while (tmp_n_threads < 8) tp_coef[tmp_n_threads++] = last; + } + break; case 'f': // CH - Divisor for Difficulty d = atof(arg); if (d == 0) /* sanity check */ @@ -1453,11 +2247,11 @@ static void parse_cmdline(int argc, char *argv[]) show_usage_and_exit(1); } - if (opt_algo == ALGO_HEAVY && opt_vote == 9999) { - fprintf(stderr, "%s: Heavycoin hash requires block reward vote parameter (see --vote)\n", - argv[0]); - show_usage_and_exit(1); - } + //if (opt_algo == ALGO_HEAVY && opt_vote == 9999) { + // fprintf(stderr, "%s: Heavycoin hash requires block reward vote parameter (see --vote)\n", + // argv[0]); + // show_usage_and_exit(1); + //} parse_config(); } @@ -1481,7 +2275,7 @@ static void signal_handler(int sig) } #endif -#define PROGRAM_VERSION "1.2" +#define PROGRAM_VERSION "djm34 pluck0.1" int main(int argc, char *argv[]) { struct thr_info *thr; @@ -1492,16 +2286,20 @@ int main(int argc, char *argv[]) SYSTEM_INFO sysinfo; #endif - printf(" *** ccMiner for nVidia GPUs by Christian Buchner and Christian H. ***\n"); - printf("\t This is version "PROGRAM_VERSION" (beta)\n"); - printf("\t based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n"); - printf("\t based on pooler-cpuminer extension for HVC from\n\t https://github.com/heavycoin/cpuminer-heavycoin\n"); - printf("\t\t\tand\n\t http://hvc.1gh.com/\n"); - printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n"); - printf("\t LTC donation address: LKS1WDKGED647msBQfLBHV3Ls8sveGncnm\n"); - printf("\t BTC donation address: 16hJF5mceSojnTD3ZTUDqdRhDyPJzoRakM\n"); - printf("\t YAC donation address: Y87sptDEcpLkLeAuex6qZioDbvy1qXZEj4\n"); - + printf(" ***** ccMiner for nVidia GPUs by djm34 *****\n"); + printf("\t This is version "PROGRAM_VERSION" \n"); + printf(" based on original ccMiner by Christian Buchner and Christian H. 2014 ***\n"); + printf("\t based on pooler-cpuminer 2.3.2 (c) 2010 Jeff Garzik, 2012 pooler\n"); + printf("\t based on pooler-cpuminer extension for HVC from\n\t https://github.com/heavycoin/cpuminer-heavycoin\n"); + printf("\t\t\tand\n\t http://hvc.1gh.com/\n"); + printf("\tCuda additions Copyright 2014 Christian Buchner, Christian H.\n"); + printf("\tCuda additions Copyright 2014 DJM34\n"); + printf("\t FTC donation address: 6esbN82brbg3eai8fqzNGm5tmbpiYu3czM\n"); + printf("\t BTC donation address: 1NENYmxwZGHsKFmyjTc5WferTn5VTFb7Ze\n"); + printf("\t VTC donation address: VrLUQmH6Jk5gFii7fASc8vJ7eEgKJqhX11\n"); + + for (int i = 0; i<8; i++) {tp_coef[i]=-1;} + opt_difficulty = 1. ; rpc_user = strdup(""); rpc_pass = strdup(""); @@ -1510,7 +2308,7 @@ int main(int argc, char *argv[]) /* parse command line */ parse_cmdline(argc, argv); - + cuda_devicenames(); if (!opt_benchmark && !rpc_url) { @@ -1579,7 +2377,7 @@ int main(int argc, char *argv[]) thr_hashrates = (double *) calloc(opt_n_threads, sizeof(double)); if (!thr_hashrates) return 1; - + /* init workio thread info */ work_thr_id = opt_n_threads; thr = &thr_info[work_thr_id]; diff --git a/cuda_helper.h b/cuda_helper.h index 8b0b3f6e90..843ad3a542 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -1,7 +1,20 @@ #ifndef CUDA_HELPER_H #define CUDA_HELPER_H -static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI) +#ifdef __INTELLISENSE__ +#define __launch_bounds__(x) +#define __byte_perm(x,y,z) +#endif + +static __device__ void LOHI(uint32_t &lo, uint32_t &hi, uint64_t x) +{ + asm("{\n\t" + "mov.b64 {%0,%1},%2; \n\t" + "}" + : "=r"(lo), "=r"(hi) : "l"(x)); +} + +static __device__ unsigned long long oMAKE_ULONGLONG(uint32_t LO, uint32_t HI) { #if __CUDA_ARCH__ >= 130 return __double_as_longlong(__hiloint2double(HI, LO)); @@ -10,8 +23,37 @@ static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI) #endif } +static __device__ unsigned long long MAKE_ULONGLONG(uint32_t LO, uint32_t HI) +{ +uint64_t result; +asm volatile ("{\n\t" + "mov.b64 %0,{%1,%2}; \n\t" + "}" + : "=l"(result) : "r"(LO) , "r"(HI)); +return result; +} +static __device__ uint32_t HIWORD(uint64_t x) +{ +uint32_t result; +asm volatile ("{\n\t" + ".reg .u32 xl; \n\t" + "mov.b64 {xl,%0},%1; \n\t" + "}" + : "=r"(result) : "l"(x)); +return result; +} +static __device__ uint32_t LOWORD(uint64_t x) +{ +uint32_t result; +asm volatile ("{\n\t" + ".reg .u32 xh; \n\t" + "mov.b64 {%0,xh},%1; \n\t" + "}" + : "=r"(result) : "l"(x)); +return result; +} // das Hi Word aus einem 64 Bit Typen extrahieren -static __device__ uint32_t HIWORD(const uint64_t &x) { +static __device__ uint32_t oHIWORD(const uint64_t &x) { #if __CUDA_ARCH__ >= 130 return (uint32_t)__double2hiint(__longlong_as_double(x)); #else @@ -19,13 +61,44 @@ static __device__ uint32_t HIWORD(const uint64_t &x) { #endif } +#if __CUDA_ARCH__ < 350 + // Kepler (Compute 3.0) + #define SPH_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) + #define SPH_ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#else + // Kepler (Compute 3.5) + #define SPH_ROTL32(x, n) __funnelshift_l( (x), (x), (n) ) + #define SPH_ROTR32(x, n) __funnelshift_r( (x), (x), (n) ) +#endif + // das Hi Word in einem 64 Bit Typen ersetzen -static __device__ uint64_t REPLACE_HIWORD(const uint64_t &x, const uint32_t &y) { +static __device__ uint64_t oREPLACE_HIWORD(const uint64_t &x, const uint32_t &y) { return (x & 0xFFFFFFFFULL) | (((uint64_t)y) << 32ULL); } +static __device__ uint64_t REPLACE_HIWORD(uint64_t x, uint32_t y) { + asm volatile("{\n\t" + " .reg .u32 tl,th; \n\t" + "mov.b64 {tl,th},%0; \n\t" + "mov.b64 %0,{tl,%1}; \n\t" + "}" + : "+l"(x) : "r"(y) ); +return x; +} + + +static __device__ uint64_t REPLACE_LOWORD(uint64_t x, uint32_t y) { + asm volatile ("{\n\t" + " .reg .u32 tl,th; \n\t" + "mov.b64 {tl,th},%0; \n\t" + "mov.b64 %0,{%1,th}; \n\t" + "}" + : "+l"(x) : "r"(y) ); +return x; +} + // das Lo Word aus einem 64 Bit Typen extrahieren -static __device__ uint32_t LOWORD(const uint64_t &x) { +static __device__ uint32_t oLOWORD(const uint64_t &x) { #if __CUDA_ARCH__ >= 130 return (uint32_t)__double2loint(__longlong_as_double(x)); #else @@ -34,24 +107,37 @@ static __device__ uint32_t LOWORD(const uint64_t &x) { } // das Lo Word in einem 64 Bit Typen ersetzen -static __device__ uint64_t REPLACE_LOWORD(const uint64_t &x, const uint32_t &y) { +static __device__ uint64_t oREPLACE_LOWORD(const uint64_t &x, const uint32_t &y) { return (x & 0xFFFFFFFF00000000ULL) | ((uint64_t)y); } // Endian Drehung für 32 Bit Typen static __device__ uint32_t cuda_swab32(uint32_t x) { - return __byte_perm(x, x, 0x0123); + return __byte_perm(x, 0, 0x0123); } +static __device__ uint64_t swap2ll(uint32_t lo, uint32_t hi) +{ +return(MAKE_ULONGLONG(cuda_swab32(lo),cuda_swab32(hi))); +} + + // Endian Drehung für 64 Bit Typen static __device__ uint64_t cuda_swab64(uint64_t x) { return MAKE_ULONGLONG(cuda_swab32(HIWORD(x)), cuda_swab32(LOWORD(x))); } +static __device__ uint64_t cuda_swab32ll(uint64_t x) { + return MAKE_ULONGLONG(cuda_swab32(LOWORD(x)), cuda_swab32(HIWORD(x))); +} + + + + // diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt #if __CUDA_ARCH__ >= 350 -__forceinline__ __device__ uint64_t ROTR64(const uint64_t value, const int offset) { +__forceinline__ __device__ uint64_t oROTR64(const uint64_t value, const int offset) { uint2 result; if(offset < 32) { asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); @@ -63,12 +149,12 @@ __forceinline__ __device__ uint64_t ROTR64(const uint64_t value, const int offse return __double_as_longlong(__hiloint2double(result.y, result.x)); } #else -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define oROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) #endif // diese 64 Bit Rotates werden unter Compute 3.5 (und besser) mit dem Funnel Shifter beschleunigt #if __CUDA_ARCH__ >= 350 -__forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offset) { +__forceinline__ __device__ uint64_t oROTL64(const uint64_t value, const int offset) { uint2 result; if(offset >= 32) { asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset)); @@ -80,7 +166,525 @@ __forceinline__ __device__ uint64_t ROTL64(const uint64_t value, const int offse return __double_as_longlong(__hiloint2double(result.y, result.x)); } #else +#define oROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#endif + +// Wolf0 Rotate +#if __CUDA_ARCH__ >= 350 +__forceinline__ __device__ uint64_t ROTR64(const uint64_t x, const int y) +{ + uint64_t res; + + asm("{\n\t" + ".reg .u32 tl,th,vl,vh;\n\t" + ".reg .pred p;\n\t" + "mov.b64 {tl,th}, %1;\n\t" + "shf.r.wrap.b32 vl, tl, th, %2;\n\t" + "shf.r.wrap.b32 vh, th, tl, %2;\n\t" + "setp.lt.u32 p, %2, 32;\n\t" + "@p mov.b64 %0, {vl,vh};\n\t" + "@!p mov.b64 %0, {vh,vl};\n\t" + "}" : "=l"(res) : "l"(x) , "r"(y)); + + return res; +} +#else +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#endif + +#if __CUDA_ARCH__ >= 350 +__forceinline__ __device__ uint64_t ROTL64(const uint64_t x, const int y) +{ + uint64_t res; + + asm("{\n\t" + ".reg .u32 tl,th,vl,vh;\n\t" + ".reg .pred p;\n\t" + "mov.b64 {tl,th}, %1;\n\t" + "shf.l.wrap.b32 vl, tl, th, %2;\n\t" + "shf.l.wrap.b32 vh, th, tl, %2;\n\t" + "setp.lt.u32 p, %2, 32;\n\t" + "@!p mov.b64 %0, {vl,vh};\n\t" + "@p mov.b64 %0, {vh,vl};\n\t" + "}" : "=l"(res) : "l"(x) , "r"(y)); + + return res; +} +#else #define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) #endif +__forceinline__ __device__ uint64_t xor1(uint64_t a, uint64_t b) { + uint64_t result; + asm("xor.b64 %0, %1, %2;" : "=l"(result) : "l"(a) ,"l"(b)); + return result; +} +__forceinline__ __device__ uint32_t xor1b(uint32_t a, uint32_t b) { + uint32_t result; + asm("xor.b32 %0, %1, %2;" : "=r"(result) : "r"(a) ,"r"(b)); + return result; +} + +__forceinline__ __device__ uint64_t xor3(uint64_t a, uint64_t b, uint64_t c) { + uint64_t result; + asm("{\n\t" + " .reg .u64 t1;\n\t" + "xor.b64 t1, %2, %3;\n\t" + "xor.b64 %0, %1, t1;\n\t" + "}" + : "=l"(result) : "l"(a) ,"l"(b),"l"(c)); + return result; +} + +__forceinline__ __device__ uint32_t xor3b(uint32_t a, uint32_t b, uint32_t c) { + uint32_t result; + asm("{\n\t" + " .reg .u32 t1;\n\t" + "xor.b32 t1, %2, %3;\n\t" + "xor.b32 %0, %1, t1;\n\t" + "}" + : "=r"(result) : "r"(a) ,"r"(b),"r"(c)); + return result; +} +__forceinline__ __device__ uint64_t xor5(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e) { + uint64_t result; + asm("{\n\t" + " .reg .u64 t1,t2,t3;\n\t" + "xor.b64 t1, %1, %2;\n\t" + "xor.b64 t2, %3, %4;\n\t" + "xor.b64 t3, t1, t2;\n\t" + "xor.b64 %0, t3,%5;\n\t" + "}" + : "=l"(result) : "l"(a) ,"l"(b), "l"(c), "l"(d) ,"l"(e)); + return result; +} + + + +__forceinline__ __device__ uint64_t xor8(uint64_t a, uint64_t b, uint64_t c, uint64_t d,uint64_t e,uint64_t f,uint64_t g, uint64_t h) { + uint64_t result; + asm volatile ("xor.b64 %0, %1, %2;" : "=l"(result) : "l"(g) ,"l"(h)); + asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(f)); + asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(e)); + asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(d)); + asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(c)); + asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(b)); + asm volatile ("xor.b64 %0, %0, %1;" : "+l"(result) : "l"(a)); + return result; +} + +__forceinline__ __device__ uint32_t xandx(uint32_t a, uint32_t b, uint32_t c) +{ + uint32_t result; + asm("{\n\t" + ".reg .u32 m,n;\n\t" + "xor.b32 m, %2,%3;\n\t" + "and.b32 n, m,%1;\n\t" + "xor.b32 %0, n,%3;\n\t" + "}\n\t" + : "=r"(result) : "r"(a), "r"(b), "r"(c)); + return result; + +} +__forceinline__ __device__ uint64_t xandx64(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t result; + asm("{\n\t" + ".reg .u64 m,n;\n\t" + "xor.b64 m, %2,%3;\n\t" + "and.b64 n, m,%1;\n\t" + "xor.b64 %0, n,%3;\n\t" + "}\n\t" + : "=l"(result) : "l"(a), "l"(b), "l"(c)); + return result; + +} + +__forceinline__ __device__ uint64_t xornot64(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t result; + asm("{\n\t" + ".reg .u64 m,n;\n\t" + "not.b64 m,%2; \n\t" + "or.b64 n, %1,m;\n\t" + "xor.b64 %0, n,%3;\n\t" + "}\n\t" + : "=l"(result) : "l"(a), "l"(b), "l"(c)); + return result; + +} + +__forceinline__ __device__ void chi(uint64_t &s0, uint64_t &s1, uint64_t &s2, uint64_t &s3, uint64_t &s4) +{ + asm("{\n\t" + ".reg .u64 m0,m1,m2,m3,m4;\n\t" + ".reg .u64 z0,z1,z2,z3,z4;\n\t" + "not.b64 m0,%0; \n\t" + "not.b64 m1,%1; \n\t" + "not.b64 m2,%2; \n\t" + "not.b64 m3,%3; \n\t" + "not.b64 m4,%4; \n\t" + "and.b64 z1,m1,%2;\n\t" + "and.b64 z2,m2,%3;\n\t" + "and.b64 z3,m3,%4;\n\t" + "and.b64 z4,m4,%0;\n\t" + "and.b64 z0,m0,%1;\n\t" + "xor.b64 %0,%0,z1;\n\t" + "xor.b64 %1,%1,z2;\n\t" + "xor.b64 %2,%2,z3;\n\t" + "xor.b64 %3,%3,z4;\n\t" + "xor.b64 %4,%4,z0;\n\t" + "}\n\t" + : "+l"(s0),"+l"(s1),"+l"(s2),"+l"(s3),"+l"(s4)); +} +__forceinline__ __device__ uint64_t xornt64(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t result; + asm("{\n\t" + ".reg .u64 m,n;\n\t" + "not.b64 m,%3; \n\t" + "or.b64 n, %2,m;\n\t" + "xor.b64 %0, %1,n;\n\t" + "}\n\t" + : "=l"(result) : "l"(a), "l"(b), "l"(c)); + return result; + +} +__forceinline__ __device__ uint64_t sph_t64(uint64_t x) +{ +uint64_t result; + asm("{\n\t" + "and.b64 %0,%1,0xFFFFFFFFFFFFFFFF;\n\t" + "}\n\t" + : "=l"(result) : "l"(x)); + return result; +} +__forceinline__ __device__ uint32_t sph_t32(uint32_t x) +{ +uint32_t result; + asm("{\n\t" + "and.b32 %0,%1,0xFFFFFFFF;\n\t" + "}\n\t" + : "=r"(result) : "r"(x)); + return result; +} + +__forceinline__ __device__ uint64_t andor(uint64_t a, uint64_t b, uint64_t c) +{ + uint64_t result; + asm("{\n\t" + ".reg .u64 m,n,o;\n\t" + "and.b64 m, %1, %2;\n\t" + " or.b64 n, %1, %2;\n\t" + "and.b64 o, n, %3;\n\t" + " or.b64 %0, m, o ;\n\t" + "}\n\t" + : "=l"(result) : "l"(a), "l"(b), "l"(c)); + return result; + +} +__forceinline__ __device__ uint32_t andor32(uint32_t a, uint32_t b, uint32_t c) +{ + uint32_t result; + asm("{\n\t" + ".reg .u32 m,n,o;\n\t" + "and.b32 m, %1, %2;\n\t" + " or.b32 n, %1, %2;\n\t" + "and.b32 o, n, %3;\n\t" + " or.b32 %0, m, o ;\n\t" + "}\n\t" + : "=r"(result) : "r"(a), "r"(b), "r"(c)); + return result; + +} +__forceinline__ __device__ uint64_t shr_t64(uint64_t x,uint32_t n) +{ +uint64_t result; +asm("{\n\t" + "shr.b64 %0,%1,%2;\n\t" + "}\n\t" + : "=l"(result) : "l"(x), "r"(n)); + return result; +} +__forceinline__ __device__ uint64_t shl_t64(uint64_t x,uint32_t n) +{ +uint64_t result; +asm("{\n\t" + "shl.b64 %0,%1,%2;\n\t" + "}\n\t" + : "=l"(result) : "l"(x), "r"(n)); + return result; +} +__forceinline__ __device__ uint32_t shr_t32(uint32_t x,uint32_t n) +{ +uint32_t result; +asm("{\n\t" + "shr.b32 %0,%1,%2;\n\t" + "}\n\t" + : "=r"(result) : "r"(x), "r"(n)); + return result; +} +__forceinline__ __device__ uint32_t shl_t32(uint32_t x,uint32_t n) +{ +uint32_t result; +asm("{\n\t" + "shl.b32 %0,%1,%2;\n\t" + "}\n\t" + : "=r"(result) : "r"(x), "r"(n)); + return result; +} +__forceinline__ __device__ void and64(uint64_t &d,uint64_t a,uint64_t b) +{ +asm("and.b64 %0,%1,%2;" : "=l"(d) : "l"(a), "l"(b)); +} + +__forceinline__ __device__ void sbox(uint32_t &a, uint32_t &b,uint32_t &c,uint32_t &d) +{ +uint32_t t; +t = a; +asm("and.b32 %0,%0,%1;" : "+r"(a) : "r"(c)); +asm("xor.b32 %0,%0,%1;" : "+r"(a) : "r"(d)); +asm("xor.b32 %0,%0,%1;" : "+r"(c) : "r"(b)); +asm("xor.b32 %0,%0,%1;" : "+r"(c) : "r"(a)); +asm( "or.b32 %0,%0,%1;" : "+r"(d) : "r"(t)); +asm("xor.b32 %0,%0,%1;" : "+r"(d) : "r"(b)); +asm("xor.b32 %0,%0,%1;" : "+r"(t) : "r"(c)); +b=d; +asm( "or.b32 %0,%0,%1;" : "+r"(d) : "r"(t)); +asm("xor.b32 %0,%0,%1;" : "+r"(d) : "r"(a)); +asm("and.b32 %0,%0,%1;" : "+r"(a) : "r"(b)); +asm("xor.b32 %0,%0,%1;" : "+r"(t) : "r"(a)); +asm("xor.b32 %0,%0,%1;" : "+r"(b) : "r"(d)); +asm("xor.b32 %0,%0,%1;" : "+r"(b) : "r"(t)); +a=c; +c=b; +b=d; +asm("not.b32 %0,%1;" : "=r"(d) : "r"(t)); +} + + + + +__forceinline__ __device__ void muladd128(uint64_t &u,uint64_t &v,uint64_t a, uint64_t b,uint64_t &c,uint64_t &e) +{ + + asm("{\n\t" + ".reg .b32 al,ah,bl,bh; \n\t" + ".reg .b32 x1,x2,x3,x4; \n\t" + ".reg .b32 cl,ch,el,eh; \n\t" + "mov.b64 {al,ah},%2; \n\t" + "mov.b64 {bl,bh},%3; \n\t" + "mov.b64 {cl,ch},%4; \n\t" + "mov.b64 {el,eh},%5; \n\t" + "add.cc.u32 x1,cl,el; \n\t" + "addc.cc.u32 x2,ch,eh; \n\t" + "addc.u32 x3,0,0; \n\t" + "mad.lo.cc.u32 x1,bl,al,x1; \n\t" + "madc.hi.cc.u32 x2,bl,al,x2; \n\t" + "addc.u32 x3,x3,0; \n\t" + "mad.lo.cc.u32 x2,bh,al,x2; \n\t" + "madc.hi.cc.u32 x3,bh,al,x3; \n\t" + "addc.u32 x4,0,0; \n\t" + "mad.lo.cc.u32 x2,bl,ah,x2; \n\t" + "madc.hi.cc.u32 x3,bl,ah,x3; \n\t" + "addc.u32 x4,x4,0; \n\t" + "mad.lo.cc.u32 x3,bh,ah,x3; \n\t" + "madc.hi.u32 x4,bh,ah,x4; \n\t" + "mov.b64 %1,{x1,x2}; \n\t" + "mov.b64 %0,{x3,x4}; \n\t" + "}\n\t" + : "=l"(u), "=l"(v) : "l"(a), "l"(b), "l"(c), "l"(e)); + +} + + + +__forceinline__ __device__ uint64_t mul(uint64_t a,uint64_t b) +{ +uint64_t result; +asm("{\n\t" + "mul.lo.u64 %0,%1,%2; \n\t" + "}\n\t" + : "=l"(result) : "l"(a) , "l"(b)); +return result; +} + +__device__ __forceinline__ uint64_t shfl(uint64_t x, int lane) +{ +uint32_t lo,hi; +asm volatile("mov.b64 {%0,%1},%2;" : "=r"(lo), "=r"(hi) : "l"(x)); +lo = __shfl(lo, lane); +hi = __shfl(hi, lane); +asm volatile("mov.b64 %0,{%1,%2};" : "=l"(x) : "r"(lo) , "r"(hi)); +return x; +} + + +///uint2 method + +#if __CUDA_ARCH__ >= 350 +__inline__ __device__ uint2 ROR2(const uint2 a, const int offset) { + uint2 result; + if (offset < 32) { + asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset)); + asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset)); + } + else { + asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset)); + asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); + + } + return result; +} +#else +__inline__ __device__ uint2 ROR2(const uint2 v, const int a) { + uint2 result; + int n = 64 -a; //lazy + if (n <= 32) { + result.y = ((v.y << (n)) | (v.x >> (32 - n))); + result.x = ((v.x << (n)) | (v.y >> (32 - n))); + } + else { + result.y = ((v.x << (n - 32)) | (v.y >> (64 - n))); + result.x = ((v.y << (n - 32)) | (v.x >> (64 - n))); + } + return result; + } +#endif + + +#if __CUDA_ARCH__ >= 350 +__inline__ __device__ uint2 ROL2(const uint2 a, const int offset) { + uint2 result; + if (offset >= 32) { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.x), "r"(a.y), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset)); + } + else { + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset)); + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); + } +return result; +} +#else +__inline__ __device__ uint2 ROL2(const uint2 v, const int n) { + uint2 result; + if (n == 32) {result.x = v.y;result.y=v.x;} + if (n < 32) { + result.y = ((v.y << (n)) | (v.x >> (32 - n))); + result.x = ((v.x << (n)) | (v.y >> (32 - n))); + } + else { + result.y = ((v.x << (n - 32)) | (v.y >> (64 - n))); + result.x = ((v.y << (n - 32)) | (v.x >> (64 - n))); + } + return result; + } +#endif + +static __forceinline__ __device__ uint64_t devectorize(uint2 v) { return MAKE_ULONGLONG(v.x, v.y); } +static __forceinline__ __device__ uint2 vectorize(uint64_t v) { + uint2 result; + LOHI(result.x, result.y, v); + return result; +} + +static __forceinline__ __device__ uint2 operator^ (uint2 a, uint2 b) { return make_uint2(a.x ^ b.x, a.y ^ b.y); } +static __forceinline__ __device__ uint2 operator& (uint2 a, uint2 b) { return make_uint2(a.x & b.x, a.y & b.y); } +static __forceinline__ __device__ uint2 operator| (uint2 a, uint2 b) { return make_uint2(a.x | b.x, a.y | b.y); } +static __forceinline__ __device__ uint2 operator~ (uint2 a) { return make_uint2(~a.x, ~a.y); } +static __forceinline__ __device__ void operator^= (uint2 &a, uint2 b) { a = a ^ b; } +static __forceinline__ __device__ uint2 operator+ (uint2 a, uint2 b) +{ + uint2 result; + asm("{\n\t" + "add.cc.u32 %0,%2,%4; \n\t" + "addc.u32 %1,%3,%5; \n\t" + "}\n\t" + : "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y)); + return result; +} +static __forceinline__ __device__ void operator+= (uint2 &a, uint2 b) { a = a + b; } + +static __forceinline__ __device__ uint2 operator* (uint2 a, uint2 b) +{ //basic multiplication between 64bit no carry outside that range (ie mul.lo.b64(a*b)) + //(what does uint64 "*" operator) + uint2 result; + asm("{\n\t" + "mul.lo.u32 %0,%2,%4; \n\t" + "mul.hi.u32 %1,%2,%4; \n\t" + "mad.lo.cc.u32 %1,%3,%4,%1; \n\t" + "madc.lo.u32 %1,%3,%5,%1; \n\t" + "}\n\t" + : "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(b.x), "r"(b.y)); + return result; +} +#if __CUDA_ARCH__ >= 350 +static __forceinline__ __device__ uint2 shiftl2(uint2 a, int offset) +{ + uint2 result; + if (offset<32) { + asm("{\n\t" + "shf.l.clamp.b32 %1,%2,%3,%4; \n\t" + "shl.b32 %0,%2,%4; \n\t" + "}\n\t" + : "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); + } + else { + asm("{\n\t" + "shf.l.clamp.b32 %1,%2,%3,%4; \n\t" + "shl.b32 %0,%2,%4; \n\t" + "}\n\t" + : "=r"(result.x), "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset)); + } + return result; +} +static __forceinline__ __device__ uint2 shiftr2(uint2 a, int offset) +{ + uint2 result; + if (offset<32) { + asm("{\n\t" + "shf.r.clamp.b32 %0,%2,%3,%4; \n\t" + "shr.b32 %1,%3,%4; \n\t" + "}\n\t" + : "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); + } + else { + asm("{\n\t" + "shf.l.clamp.b32 %0,%2,%3,%4; \n\t" + "shl.b32 %1,%3,%4; \n\t" + "}\n\t" + : "=r"(result.x), "=r"(result.y) : "r"(a.y), "r"(a.x), "r"(offset)); + } + return result; +} +#else +static __forceinline__ __device__ uint2 shiftl2(uint2 a, int offset) +{ + uint2 result; + asm("{\n\t" + ".reg .b64 u,v; \n\t" + "mov.b64 v,{%2,%3}; \n\t" + "shl.b64 u,v,%4; \n\t" + "mov.b64 {%0,%1},v; \n\t" + "}\n\t" + : "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); + return result; +} +static __forceinline__ __device__ uint2 shiftr2(uint2 a, int offset) +{ + uint2 result; + asm("{\n\t" + ".reg .b64 u,v; \n\t" + "mov.b64 v,{%2,%3}; \n\t" + "shr.b64 u,v,%4; \n\t" + "mov.b64 {%0,%1},v; \n\t" + "}\n\t" + : "=r"(result.x), "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); + return result; +} +#endif +/////////////////////////////////////////////////////////////////////////////////// + + #endif // #ifndef CUDA_HELPER_H diff --git a/cuda_vector.h b/cuda_vector.h new file mode 100644 index 0000000000..0dad00de09 --- /dev/null +++ b/cuda_vector.h @@ -0,0 +1,256 @@ +#ifndef CUDA_VECTOR_H +#define CUDA_VECTOR_H + + +/////////////////////////////////////////////////////////////////////////////////// +#if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) +#define __LDG_PTR "l" +#else +#define __LDG_PTR "r" +#endif +#include "cuda_helper.h" + +//typedef __device_builtin__ struct ulong16 ulong16; + +typedef struct __align__(32) uint8 +{ + unsigned int s0, s1, s2, s3, s4, s5, s6, s7; +} uint8; + +typedef struct __align__(64) uint16 +{ + union { + struct {unsigned int s0, s1, s2, s3, s4, s5, s6, s7;}; + uint8 lo; + }; + union { + struct {unsigned int s8, s9, sa, sb, sc, sd, se, sf;}; + uint8 hi;}; +} uint16; + + + + +static __inline__ __host__ __device__ uint16 make_uint16( + unsigned int s0, unsigned int s1, unsigned int s2, unsigned int s3, unsigned int s4, unsigned int s5, unsigned int s6, unsigned int s7, + unsigned int s8, unsigned int s9, unsigned int sa, unsigned int sb, unsigned int sc, unsigned int sd, unsigned int se, unsigned int sf) +{ + uint16 t; t.s0 = s0; t.s1 = s1; t.s2 = s2; t.s3 = s3; t.s4 = s4; t.s5 = s5; t.s6 = s6; t.s7 = s7; + t.s8 = s8; t.s9 = s9; t.sa = sa; t.sb = sb; t.sc = sc; t.sd = sd; t.se = se; t.sf = sf; + return t; +} +static __inline__ __host__ __device__ uint16 make_uint16(const uint8 &a, const uint8 &b) +{ +uint16 t; t.lo=a; t.hi=b; return t; +} +static __inline__ __host__ __device__ uint8 make_uint8( + unsigned int s0, unsigned int s1, unsigned int s2, unsigned int s3, unsigned int s4, unsigned int s5, unsigned int s6, unsigned int s7) +{ + uint8 t; t.s0 = s0; t.s1 = s1; t.s2 = s2; t.s3 = s3; t.s4 = s4; t.s5 = s5; t.s6 = s6; t.s7 = s7; + return t; +} + + + + +static __forceinline__ __device__ uchar4 operator^ (uchar4 a, uchar4 b) { return make_uchar4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); } +static __forceinline__ __device__ uchar4 operator+ (uchar4 a, uchar4 b) { return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } + + +static __forceinline__ __device__ uint4 operator^ (uint4 a, uint4 b) { return make_uint4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); } +static __forceinline__ __device__ uint4 operator+ (uint4 a, uint4 b) { return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } + + +static __forceinline__ __device__ ulonglong4 operator^ (ulonglong4 a, ulonglong4 b) { return make_ulonglong4(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z, a.w ^ b.w); } +static __forceinline__ __device__ ulonglong4 operator+ (ulonglong4 a, ulonglong4 b) { return make_ulonglong4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); } +static __forceinline__ __device__ ulonglong2 operator^ (ulonglong2 a, ulonglong2 b) { return make_ulonglong2(a.x ^ b.x, a.y ^ b.y); } + + +static __forceinline__ __device__ __host__ uint8 operator^ (const uint8 &a, const uint8 &b) { return make_uint8(a.s0 ^ b.s0, a.s1 ^ b.s1, a.s2 ^ b.s2, a.s3 ^ b.s3, a.s4 ^ b.s4, a.s5 ^ b.s5, a.s6 ^ b.s6, a.s7 ^ b.s7); } + +static __forceinline__ __device__ __host__ uint8 operator+ (const uint8 &a, const uint8 &b) { return make_uint8(a.s0 + b.s0, a.s1 + b.s1, a.s2 + b.s2, a.s3 + b.s3, a.s4 + b.s4, a.s5 + b.s5, a.s6 + b.s6, a.s7 + b.s7); } + +static __forceinline__ __device__ __host__ uint16 operator^ (const uint16 &a, const uint16 &b) { + return make_uint16(a.s0 ^ b.s0, a.s1 ^ b.s1, a.s2 ^ b.s2, a.s3 ^ b.s3, a.s4 ^ b.s4, a.s5 ^ b.s5, a.s6 ^ b.s6, a.s7 ^ b.s7, + a.s8 ^ b.s8, a.s9 ^ b.s9, a.sa ^ b.sa, a.sb ^ b.sb, a.sc ^ b.sc, a.sd ^ b.sd, a.se ^ b.se, a.sf ^ b.sf); +} + + + +static __forceinline__ __device__ __host__ uint16 operator+ (const uint16 &a, const uint16 &b) { + return make_uint16(a.s0 + b.s0, a.s1 + b.s1, a.s2 + b.s2, a.s3 + b.s3, a.s4 + b.s4, a.s5 + b.s5, a.s6 + b.s6, a.s7 + b.s7, + a.s8 + b.s8, a.s9 + b.s9, a.sa + b.sa, a.sb + b.sb, a.sc + b.sc, a.sd + b.sd, a.se + b.se, a.sf + b.sf); +} + + + +static __forceinline__ __device__ void operator^= (uint4 &a, uint4 b) { a = a ^ b; } +static __forceinline__ __device__ void operator^= (uchar4 &a, uchar4 b) { a = a ^ b; } +static __forceinline__ __device__ __host__ void operator^= (uint8 &a, const uint8 &b) { a = a ^ b; } +static __forceinline__ __device__ __host__ void operator^= (uint16 &a, const uint16 &b) { a = a ^ b; } + + +static __forceinline__ __device__ void operator^= (ulonglong4 &a, const ulonglong4 &b) { a = a ^ b; } +static __forceinline__ __device__ void operator^= (ulonglong2 &a, const ulonglong2 &b) { a = a ^ b; } + +static __forceinline__ __device__ void operator+= (uint4 &a, uint4 b) { a = a + b; } +static __forceinline__ __device__ void operator+= (uchar4 &a, uchar4 b) { a = a + b; } +static __forceinline__ __device__ __host__ void operator+= (uint8 &a, const uint8 &b) { a = a + b; } +static __forceinline__ __device__ __host__ void operator+= (uint16 &a, const uint16 &b) { a = a + b; } + + +static __forceinline__ __device__ uint32_t rotate(uint32_t vec4, uint32_t shift) +{ + uint32_t ret; + asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(vec4), "r"(vec4), "r"(shift)); + return ret; +} + + +static __forceinline__ __device__ uint32_t rotateR(uint32_t vec4, uint32_t shift) +{ + uint32_t ret; + asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(vec4), "r"(vec4), "r"(shift)); + return ret; +} + + +static __device__ __inline__ uint8 __ldg8(const uint8_t *ptr) +{ + + uint8 test; + asm volatile ("ld.global.nc.v4.u32 {%0,%1,%2,%3},[%4];" : "=r"(test.s0), "=r"(test.s1), "=r"(test.s2), "=r"(test.s3) : __LDG_PTR(ptr)); + asm volatile ("ld.global.nc.v4.u32 {%0,%1,%2,%3},[%4+16];" : "=r"(test.s4), "=r"(test.s5), "=r"(test.s6), "=r"(test.s7) : __LDG_PTR(ptr)); + + return (test); +} + + +static __device__ __inline__ uint32_t __ldgtoint(const uint8_t *ptr) +{ + + uint32_t test; + asm volatile ("ld.global.nc.u32 {%0},[%1];" : "=r"(test) : __LDG_PTR(ptr)); + return (test); +} + +static __device__ __inline__ uint32_t __ldgtoint64(const uint8_t *ptr) +{ + + uint64_t test; + asm volatile ("ld.global.nc.u64 {%0},[%1];" : "=l"(test) : __LDG_PTR(ptr)); + return (test); +} + + +static __device__ __inline__ uint32_t __ldgtoint_unaligned(const uint8_t *ptr) +{ + + uint32_t test; + asm volatile ("{\n\t" + ".reg .u8 a,b,c,d; \n\t" + "ld.global.nc.u8 a,[%1]; \n\t" + "ld.global.nc.u8 b,[%1+1]; \n\t" + "ld.global.nc.u8 c,[%1+2]; \n\t" + "ld.global.nc.u8 d,[%1+3]; \n\t" + "mov.b32 %0,{a,b,c,d}; }\n\t" +: "=r"(test) : __LDG_PTR(ptr)); + return (test); +} + +static __device__ __inline__ uint64_t __ldgtoint64_unaligned(const uint8_t *ptr) +{ + uint64_t test; + asm volatile ("{\n\t" + ".reg .u8 a,b,c,d,e,f,g,h; \n\t" + ".reg .u32 i,j; \n\t" + "ld.global.nc.u8 a,[%1]; \n\t" + "ld.global.nc.u8 b,[%1+1]; \n\t" + "ld.global.nc.u8 c,[%1+2]; \n\t" + "ld.global.nc.u8 d,[%1+3]; \n\t" + "ld.global.nc.u8 e,[%1+4]; \n\t" + "ld.global.nc.u8 f,[%1+5]; \n\t" + "ld.global.nc.u8 g,[%1+6]; \n\t" + "ld.global.nc.u8 h,[%1+7]; \n\t" + "mov.b32 i,{a,b,c,d}; \n\t" + "mov.b32 j,{e,f,g,h}; \n\t" + "mov.b64 %0,{i,j}; }\n\t" + : "=l"(test) : __LDG_PTR(ptr)); + return (test); +} + + +static __device__ __inline__ uint64_t __ldgtoint64_trunc(const uint8_t *ptr) +{ + uint32_t zero = 0; + + uint64_t test; + asm volatile ("{\n\t" + ".reg .u8 a,b,c,d; \n\t" + ".reg .u32 i; \n\t" + "ld.global.nc.u8 a,[%1]; \n\t" + "ld.global.nc.u8 b,[%1+1]; \n\t" + "ld.global.nc.u8 c,[%1+2]; \n\t" + "ld.global.nc.u8 d,[%1+3]; \n\t" + "mov.b32 i,{a,b,c,d}; \n\t" + "mov.b64 %0,{i,%1}; }\n\t" + : "=l"(test) : __LDG_PTR(ptr), "r"(zero)); + return (test); +} + + + +static __device__ __inline__ uint32_t __ldgtoint_unaligned2(const uint8_t *ptr) +{ + + uint32_t test; + asm("{\n\t" + ".reg .u8 e,b,c,d; \n\t" + "ld.global.nc.u8 e,[%1]; \n\t" + "ld.global.nc.u8 b,[%1+1]; \n\t" + "ld.global.nc.u8 c,[%1+2]; \n\t" + "ld.global.nc.u8 d,[%1+3]; \n\t" + "mov.b32 %0,{e,b,c,d}; }\n\t" + : "=r"(test) : __LDG_PTR(ptr)); + return (test); +} + + + + +static __forceinline__ __device__ uint8 swapvec(const uint8 *buf) +{ + uint8 vec; + vec.s0 = cuda_swab32(buf[0].s0); + vec.s1 = cuda_swab32(buf[0].s1); + vec.s2 = cuda_swab32(buf[0].s2); + vec.s3 = cuda_swab32(buf[0].s3); + vec.s4 = cuda_swab32(buf[0].s4); + vec.s5 = cuda_swab32(buf[0].s5); + vec.s6 = cuda_swab32(buf[0].s6); + vec.s7 = cuda_swab32(buf[0].s7); + return vec; +} + +static __forceinline__ __device__ uint16 swapvec(const uint16 *buf) +{ + uint16 vec; + vec.s0 = cuda_swab32(buf[0].s0); + vec.s1 = cuda_swab32(buf[0].s1); + vec.s2 = cuda_swab32(buf[0].s2); + vec.s3 = cuda_swab32(buf[0].s3); + vec.s4 = cuda_swab32(buf[0].s4); + vec.s5 = cuda_swab32(buf[0].s5); + vec.s6 = cuda_swab32(buf[0].s6); + vec.s7 = cuda_swab32(buf[0].s7); + vec.s8 = cuda_swab32(buf[0].s8); + vec.s9 = cuda_swab32(buf[0].s9); + vec.sa = cuda_swab32(buf[0].sa); + vec.sb = cuda_swab32(buf[0].sb); + vec.sc = cuda_swab32(buf[0].sc); + vec.sd = cuda_swab32(buf[0].sd); + vec.se = cuda_swab32(buf[0].se); + vec.sf = cuda_swab32(buf[0].sf); + return vec; +} +#endif // #ifndef CUDA_HELPER_H diff --git a/heavy/heavy.cu b/heavy/heavy.cu index 98728dc222..37b98e4f47 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -167,6 +167,9 @@ extern "C" int cuda_num_devices() // Gerätenamen holen extern char *device_name[8]; extern int device_map[8]; +int device_major[8]; +int device_minor[8]; +int compute_version[8]; extern "C" void cuda_devicenames() { @@ -185,6 +188,9 @@ extern "C" void cuda_devicenames() cudaGetDeviceProperties(&props, device_map[i]); device_name[i] = strdup(props.name); + device_major[i] = props.major; + device_minor[i] = props.minor; + compute_version[i]= props.major*10+props.minor; } } diff --git a/lyra2/cuda_lyra2.cu b/lyra2/cuda_lyra2.cu new file mode 100644 index 0000000000..2c8327f592 --- /dev/null +++ b/lyra2/cuda_lyra2.cu @@ -0,0 +1,451 @@ +/* + * lyra2 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author djm34 + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +extern int compute_version[8]; + +#include "cuda_helper.h" + + +static __constant__ uint2 blake2b_IV[8] = +{ + { 0xf3bcc908, 0x6a09e667 }, + { 0x84caa73b, 0xbb67ae85 }, + { 0xfe94f82b, 0x3c6ef372 }, + { 0x5f1d36f1, 0xa54ff53a }, + { 0xade682d1, 0x510e527f }, + { 0x2b3e6c1f, 0x9b05688c }, + { 0xfb41bd6b, 0x1f83d9ab }, + { 0x137e2179, 0x5be0cd19 } +}; + +#define reduceDuplexRowSetup(rowIn, rowInOut, rowOut) \ + { \ + for (int i = 0; i < 8; i++) \ + { \ +\ + for (int j = 0; j < 12; j++) {state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut];} \ + round_lyra_v35(state); \ + for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \ +\ + Matrix[0 + 12 * i][rowInOut] ^= state[11]; \ + Matrix[1 + 12 * i][rowInOut] ^= state[0]; \ + Matrix[2 + 12 * i][rowInOut] ^= state[1]; \ + Matrix[3 + 12 * i][rowInOut] ^= state[2]; \ + Matrix[4 + 12 * i][rowInOut] ^= state[3]; \ + Matrix[5 + 12 * i][rowInOut] ^= state[4]; \ + Matrix[6 + 12 * i][rowInOut] ^= state[5]; \ + Matrix[7 + 12 * i][rowInOut] ^= state[6]; \ + Matrix[8 + 12 * i][rowInOut] ^= state[7]; \ + Matrix[9 + 12 * i][rowInOut] ^= state[8]; \ + Matrix[10 + 12 * i][rowInOut] ^= state[9]; \ + Matrix[11 + 12 * i][rowInOut] ^= state[10]; \ + } \ + \ + } + +#define reduceDuplexRow(rowIn, rowInOut, rowOut) \ + { \ + for (int i = 0; i < 8; i++) \ + { \ + for (int j = 0; j < 12; j++) \ + state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut]; \ + \ + round_lyra_v35(state); \ + for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \ +\ + Matrix[0 + 12 * i][rowInOut] ^= state[11]; \ + Matrix[1 + 12 * i][rowInOut] ^= state[0]; \ + Matrix[2 + 12 * i][rowInOut] ^= state[1]; \ + Matrix[3 + 12 * i][rowInOut] ^= state[2]; \ + Matrix[4 + 12 * i][rowInOut] ^= state[3]; \ + Matrix[5 + 12 * i][rowInOut] ^= state[4]; \ + Matrix[6 + 12 * i][rowInOut] ^= state[5]; \ + Matrix[7 + 12 * i][rowInOut] ^= state[6]; \ + Matrix[8 + 12 * i][rowInOut] ^= state[7]; \ + Matrix[9 + 12 * i][rowInOut] ^= state[8]; \ + Matrix[10 + 12 * i][rowInOut] ^= state[9]; \ + Matrix[11 + 12 * i][rowInOut] ^= state[10]; \ + } \ + \ + } +#define absorbblock(in) { \ + state[0] ^= Matrix[0][in]; \ + state[1] ^= Matrix[1][in]; \ + state[2] ^= Matrix[2][in]; \ + state[3] ^= Matrix[3][in]; \ + state[4] ^= Matrix[4][in]; \ + state[5] ^= Matrix[5][in]; \ + state[6] ^= Matrix[6][in]; \ + state[7] ^= Matrix[7][in]; \ + state[8] ^= Matrix[8][in]; \ + state[9] ^= Matrix[9][in]; \ + state[10] ^= Matrix[10][in]; \ + state[11] ^= Matrix[11][in]; \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + round_lyra_v35(state); \ + } + +//// compute 30 version +#define reduceDuplexRowSetup_v30(rowIn, rowInOut, rowOut) \ + { \ + for (int i = 0; i < 8; i++) \ + { \ +\ + for (int j = 0; j < 12; j++) {state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut];} \ + round_lyra_v30(state); \ + for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \ +\ + Matrix[0 + 12 * i][rowInOut] ^= state[11]; \ + Matrix[1 + 12 * i][rowInOut] ^= state[0]; \ + Matrix[2 + 12 * i][rowInOut] ^= state[1]; \ + Matrix[3 + 12 * i][rowInOut] ^= state[2]; \ + Matrix[4 + 12 * i][rowInOut] ^= state[3]; \ + Matrix[5 + 12 * i][rowInOut] ^= state[4]; \ + Matrix[6 + 12 * i][rowInOut] ^= state[5]; \ + Matrix[7 + 12 * i][rowInOut] ^= state[6]; \ + Matrix[8 + 12 * i][rowInOut] ^= state[7]; \ + Matrix[9 + 12 * i][rowInOut] ^= state[8]; \ + Matrix[10 + 12 * i][rowInOut] ^= state[9]; \ + Matrix[11 + 12 * i][rowInOut] ^= state[10]; \ + } \ + \ + } + +#define reduceDuplexRow_v30(rowIn, rowInOut, rowOut) \ + { \ + for (int i = 0; i < 8; i++) \ + { \ + for (int j = 0; j < 12; j++) \ + state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut]; \ + \ + round_lyra_v30(state); \ + for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \ +\ + Matrix[0 + 12 * i][rowInOut] ^= state[11]; \ + Matrix[1 + 12 * i][rowInOut] ^= state[0]; \ + Matrix[2 + 12 * i][rowInOut] ^= state[1]; \ + Matrix[3 + 12 * i][rowInOut] ^= state[2]; \ + Matrix[4 + 12 * i][rowInOut] ^= state[3]; \ + Matrix[5 + 12 * i][rowInOut] ^= state[4]; \ + Matrix[6 + 12 * i][rowInOut] ^= state[5]; \ + Matrix[7 + 12 * i][rowInOut] ^= state[6]; \ + Matrix[8 + 12 * i][rowInOut] ^= state[7]; \ + Matrix[9 + 12 * i][rowInOut] ^= state[8]; \ + Matrix[10 + 12 * i][rowInOut] ^= state[9]; \ + Matrix[11 + 12 * i][rowInOut] ^= state[10]; \ + } \ + \ + } +#define absorbblock_v30(in) { \ + state[0] ^= Matrix[0][in]; \ + state[1] ^= Matrix[1][in]; \ + state[2] ^= Matrix[2][in]; \ + state[3] ^= Matrix[3][in]; \ + state[4] ^= Matrix[4][in]; \ + state[5] ^= Matrix[5][in]; \ + state[6] ^= Matrix[6][in]; \ + state[7] ^= Matrix[7][in]; \ + state[8] ^= Matrix[8][in]; \ + state[9] ^= Matrix[9][in]; \ + state[10] ^= Matrix[10][in]; \ + state[11] ^= Matrix[11][in]; \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + round_lyra_v30(state); \ + } + + + + + static __device__ __forceinline__ void Gfunc_v35(uint2 & a, uint2 &b, uint2 &c, uint2 &d) + { + a += b; d ^= a; d = ROR2(d, 32); + c += d; b ^= c; b = ROR2(b, 24); + a += b; d ^= a; d = ROR2(d, 16); + c += d; b ^= c; b = ROR2(b, 63); + } + + + static __device__ __forceinline__ void Gfunc_v30(uint64_t & a, uint64_t &b, uint64_t &c, uint64_t &d) + { + a += b; d ^= a; d = ROTR64(d, 32); + c += d; b ^= c; b = ROTR64(b, 24); + a += b; d ^= a; d = ROTR64(d, 16); + c += d; b ^= c; b = ROTR64(b, 63); + } + + +static __device__ __forceinline__ void round_lyra_v35(uint2 *s) +{ + Gfunc_v35(s[0], s[4], s[8], s[12]); + Gfunc_v35(s[1], s[5], s[9], s[13]); + Gfunc_v35(s[2], s[6], s[10], s[14]); + Gfunc_v35(s[3], s[7], s[11], s[15]); + Gfunc_v35(s[0], s[5], s[10], s[15]); + Gfunc_v35(s[1], s[6], s[11], s[12]); + Gfunc_v35(s[2], s[7], s[8], s[13]); + Gfunc_v35(s[3], s[4], s[9], s[14]); +} + +static __device__ __forceinline__ void round_lyra_v30(uint64_t *s) +{ + Gfunc_v30(s[0], s[4], s[8], s[12]); + Gfunc_v30(s[1], s[5], s[9], s[13]); + Gfunc_v30(s[2], s[6], s[10], s[14]); + Gfunc_v30(s[3], s[7], s[11], s[15]); + Gfunc_v30(s[0], s[5], s[10], s[15]); + Gfunc_v30(s[1], s[6], s[11], s[12]); + Gfunc_v30(s[2], s[7], s[8], s[13]); + Gfunc_v30(s[3], s[4], s[9], s[14]); +} + + + +__global__ void __launch_bounds__(160, 1) lyra2_gpu_hash_32_v30(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint64_t state[16]; +#pragma unroll + for (int i = 0; i<4; i++) { state[i] = outputHash[threads*i + thread]; } //password +#pragma unroll + for (int i = 0; i<4; i++) { state[i + 4] = state[i]; } //salt +#pragma unroll + for (int i = 0; i<8; i++) { state[i + 8] = devectorize(blake2b_IV[i]); } + + // blake2blyra x2 +#pragma unroll 24 + for (int i = 0; i<24; i++) { round_lyra_v30(state); } //because 12 is not enough + + uint64_t Matrix[96][8]; // not cool + /// reducedSqueezeRow0 +#pragma unroll 8 + for (int i = 0; i < 8; i++) + { +int idx = 84-12*i; +#pragma unroll 12 + for (int j = 0; j<12; j++) { Matrix[j + idx][0] = state[j]; } + round_lyra_v30(state); + } + + /// reducedSqueezeRow1 +#pragma unroll 8 + for (int i = 0; i < 8; i++) + { +int idx0= 12*i; +int idx1= 84-idx0; +#pragma unroll 12 + for (int j = 0; j<12; j++) { state[j] ^= Matrix[j + idx0][0]; } + round_lyra_v30(state); +#pragma unroll 12 + for (int j = 0; j<12; j++) { Matrix[j + idx1][1] = Matrix[j + idx0][0] ^ state[j]; } + } + + + reduceDuplexRowSetup_v30(1, 0, 2); + reduceDuplexRowSetup_v30(2, 1, 3); + reduceDuplexRowSetup_v30(3, 0, 4); + reduceDuplexRowSetup_v30(4, 3, 5); + reduceDuplexRowSetup_v30(5, 2, 6); + reduceDuplexRowSetup_v30(6, 1, 7); + + + + uint64_t rowa; + rowa = state[0] & 7; + reduceDuplexRow_v30(7, rowa, 0); + rowa = state[0] & 7; + reduceDuplexRow_v30(0, rowa, 3); + rowa = state[0] & 7; + reduceDuplexRow_v30(3, rowa, 6); + rowa = state[0] & 7; + reduceDuplexRow_v30(6, rowa, 1); + rowa = state[0] & 7; + reduceDuplexRow_v30(1, rowa, 4); + rowa = state[0] & 7; + reduceDuplexRow_v30(4, rowa, 7); + rowa = state[0] & 7; + reduceDuplexRow_v30(7, rowa, 2); + rowa = state[0] & 7; + reduceDuplexRow_v30(2, rowa, 5); + + absorbblock_v30(rowa); + + +#pragma unroll + for (int i = 0; i<4; i++) { + outputHash[threads*i + thread] = state[i]; + } //password + + + } //thread +} + + +__global__ void __launch_bounds__(160, 1) lyra2_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint2 state[16]; +#pragma unroll + for (int i = 0; i<4; i++) { LOHI(state[i].x, state[i].y, outputHash[threads*i + thread]); } //password +#pragma unroll + for (int i = 0; i<4; i++) { state[i + 4] = state[i]; } //salt +#pragma unroll + for (int i = 0; i<8; i++) { state[i + 8] = blake2b_IV[i]; } + + // blake2blyra x2 +#pragma unroll 24 + for (int i = 0; i<24; i++) { round_lyra_v35(state); } //because 12 is not enough + + uint2 Matrix[96][8]; // not cool + + /// reducedSqueezeRow0 +#pragma unroll 8 + for (int i = 0; i < 8; i++) + { +#pragma unroll 12 + for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][0] = state[j]; } + round_lyra_v35(state); + } + + /// reducedSqueezeRow1 +#pragma unroll 8 + for (int i = 0; i < 8; i++) + { +#pragma unroll 12 + for (int j = 0; j<12; j++) { state[j] ^= Matrix[j + 12 * i][0]; } + round_lyra_v35(state); +#pragma unroll 12 + for (int j = 0; j<12; j++) { Matrix[j + 84 - 12 * i][1] = Matrix[j + 12 * i][0] ^ state[j]; } + } + + reduceDuplexRowSetup(1, 0, 2); + reduceDuplexRowSetup(2, 1, 3); + reduceDuplexRowSetup(3, 0, 4); + reduceDuplexRowSetup(4, 3, 5); + reduceDuplexRowSetup(5, 2, 6); + reduceDuplexRowSetup(6, 1, 7); + + + + uint32_t rowa; + rowa = state[0].x & 7; + reduceDuplexRow(7, rowa, 0); + rowa = state[0].x & 7; + reduceDuplexRow(0, rowa, 3); + rowa = state[0].x & 7; + reduceDuplexRow(3, rowa, 6); + rowa = state[0].x & 7; + reduceDuplexRow(6, rowa, 1); + rowa = state[0].x & 7; + reduceDuplexRow(1, rowa, 4); + rowa = state[0].x & 7; + reduceDuplexRow(4, rowa, 7); + rowa = state[0].x & 7; + reduceDuplexRow(7, rowa, 2); + rowa = state[0].x & 7; + reduceDuplexRow(2, rowa, 5); + + absorbblock(rowa); + + +#pragma unroll + for (int i = 0; i<4; i++) { + outputHash[threads*i + thread] = devectorize(state[i]); + } //password + + + } //thread +} + + +void lyra2_cpu_init(int thr_id, int threads) +{ +//not used +} + + +__host__ void lyra2_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 160; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock - 1) / threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + if (compute_version[thr_id]>=35) { + lyra2_gpu_hash_32 << > >(threads, startNounce, d_outputHash); + } + else { // kernel for compute30 card + lyra2_gpu_hash_32_v30 << > >(threads, startNounce, d_outputHash); + } + + MyStreamSynchronize(NULL, order, thr_id); + +} + diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu new file mode 100644 index 0000000000..440c2d197c --- /dev/null +++ b/lyra2/lyra2RE.cu @@ -0,0 +1,160 @@ + +extern "C" +{ +#include "sph/sph_blake.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_keccak.h" +#include "sph/Lyra2.h" + +#include "miner.h" +} + +#include + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint64_t *d_hash[8]; + + + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); +extern uint32_t quark_check_cpu_hash_64_2(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint64_t *d_inputHash, int order); + + +extern void blake256_cpu_init(int thr_id, int threads); +extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order); +extern void blake256_cpu_setBlock_80(uint32_t *pdata); +extern void keccak256_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order); +extern void keccak256_cpu_init(int thr_id, int threads); +extern void skein256_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order); +extern void skein256_cpu_init(int thr_id, int threads); + +extern void lyra2_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order); +extern void lyra2_cpu_init(int thr_id, int threads); + +extern void groestl256_setTarget(const void *ptarget); +extern uint32_t groestl256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); +extern void groestl256_cpu_init(int thr_id, int threads); +extern uint32_t groestl256_cpu64_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); +extern void groestl256_cpu64_init(int thr_id, int threads); + + +// X11 Hashfunktion +inline void lyra_hash(void *state, const void *input) +{ + // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11 + sph_blake256_context ctx_blake; + sph_groestl256_context ctx_groestl; + sph_keccak256_context ctx_keccak; + sph_skein256_context ctx_skein; + + uint32_t hashA[8], hashB[8], hash[8]; + uint32_t * data = (uint32_t*)input; +// for (int i = 0; i<10; i++) { printf("cpu data %d %08x %08x\n", i, data[2*i],data[2*i+1]); } + sph_blake256_init(&ctx_blake); + sph_blake256(&ctx_blake, input, 80); + sph_blake256_close(&ctx_blake, hashA); + + sph_keccak256_init(&ctx_keccak); + sph_keccak256(&ctx_keccak, hashA, 32); + sph_keccak256_close(&ctx_keccak, hashB); + + LYRA2(hashA, 32, hashB, 32, hashB, 32, 1, 8, 8); + + sph_skein256_init(&ctx_skein); + sph_skein256(&ctx_skein, hashA, 32); + sph_skein256_close(&ctx_skein, hashB); + + sph_groestl256_init(&ctx_groestl); + sph_groestl256(&ctx_groestl, hashB, 32); + sph_groestl256_close(&ctx_groestl, hash); +//for (int i = 0; i<4; i++) { printf("cpu groestl %d %08x %08x\n", i, hash[2 * i], hash[2 * i + 1]); } + memcpy(state, hash, 32); +} + +extern float tp_coef[8]; +extern bool opt_benchmark; + +extern "C" int scanhash_lyra(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + if (tp_coef[thr_id]<0) { tp_coef[thr_id] = 4.; } + const int throughput = (int) (256*256*tp_coef[thr_id]); + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + cudaDeviceReset(); + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 8 * sizeof(uint32_t) * throughput); + blake256_cpu_init(thr_id, throughput); + keccak256_cpu_init(thr_id,throughput); + skein256_cpu_init(thr_id, throughput); + lyra2_cpu_init(thr_id, throughput); + groestl256_cpu_init(thr_id, throughput); + init[thr_id] = true; + } + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + blake256_cpu_setBlock_80(pdata); + groestl256_setTarget(ptarget); + + do { + int order = 0; + + // erstes Blake512 Hash mit CUDA + blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + + skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + // Scan nach Gewinner Hashes auf der GPU + uint32_t foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); +//foundNonce = pdata[19]+10; + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + //pdata[19]=foundNonce; +// lyra_hash(vhash64, endiandata); + +// if ( ((uint64_t*)vhash64)[3] <= ((uint64_t*)ptarget)[3]) { // && fulltest(vhash64, ptarget)) { +// printf("target %08x %08x %08x %08x\n", ptarget[0], ptarget[1], ptarget[2], ptarget[3]); +// printf("target %08x %08x %08x %08x\n", ptarget[4], ptarget[5], ptarget[6], ptarget[7]); + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; +// } else { +// applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); +// } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/miner.h b/miner.h index 0e205188fa..75f124c0f3 100644 --- a/miner.h +++ b/miner.h @@ -168,6 +168,59 @@ static inline void le16enc(void *pp, uint16_t x) } #endif +#if !HAVE_DECL_BE64DEC +static inline uint64_t be64dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint64_t)(p[7]) + ((uint64_t)(p[6]) << 8) + + ((uint64_t)(p[5]) << 16) + ((uint64_t)(p[4]) << 24) + + ((uint64_t)(p[3]) << 32) + ((uint64_t)(p[2]) << 40) + + ((uint64_t)(p[1]) << 48) + ((uint64_t)(p[0]) << 56)); +} +#endif + +#if !HAVE_DECL_LE64DEC +static inline uint64_t le64dec(const void *pp) +{ + const uint8_t *p = (uint8_t const *)pp; + return ((uint64_t)(p[0]) + ((uint64_t)(p[1]) << 8) + + ((uint64_t)(p[2]) << 16) + ((uint64_t)(p[3]) << 24) + + ((uint64_t)(p[4]) << 32) + ((uint64_t)(p[5]) << 40) + + ((uint64_t)(p[6]) << 48) + ((uint64_t)(p[7]) << 56)); +} +#endif + +#if !HAVE_DECL_BE64ENC +static inline void be64enc(void *pp, uint64_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[7] = x & 0xff; + p[6] = (x >> 8) & 0xff; + p[5] = (x >> 16) & 0xff; + p[4] = (x >> 24) & 0xff; + p[3] = (x >> 32) & 0xff; + p[2] = (x >> 40) & 0xff; + p[1] = (x >> 48) & 0xff; + p[0] = (x >> 56) & 0xff; +} +#endif + +#if !HAVE_DECL_LE64ENC +static inline void le64enc(void *pp, uint64_t x) +{ + uint8_t *p = (uint8_t *)pp; + p[0] = x & 0xff; + p[1] = (x >> 8) & 0xff; + p[2] = (x >> 16) & 0xff; + p[3] = (x >> 24) & 0xff; + p[4] = (x >> 32) & 0xff; + p[5] = (x >> 40) & 0xff; + p[6] = (x >> 48) & 0xff; + p[7] = (x >> 56) & 0xff; +} +#endif + + #if JANSSON_MAJOR_VERSION >= 2 #define JSON_LOADS(str, err_ptr) json_loads((str), 0, (err_ptr)) #else @@ -231,10 +284,18 @@ extern int scanhash_anime(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); +extern int scanhash_qubit(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + extern int scanhash_nist5(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); +extern int scanhash_fresh(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + extern int scanhash_x11(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); @@ -243,6 +304,55 @@ extern int scanhash_x13(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); +extern int scanhash_x14(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_x15(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_x17(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_goal(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_m7(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_deep(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_lyra(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_neoscrypt(bool stratum,int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_pluck(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + + +extern int scanhash_keccak256(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_wh(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + +extern int scanhash_doom(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done); + extern void fugue256_hash(unsigned char* output, const unsigned char* input, int len); extern void heavycoin_hash(unsigned char* output, const unsigned char* input, int len); extern void groestlcoin_hash(unsigned char* output, const unsigned char* input, int len); @@ -258,6 +368,7 @@ struct work_restart { char padding[128 - sizeof(unsigned long)]; }; + extern bool opt_debug; extern bool opt_protocol; extern int opt_timeout; @@ -277,11 +388,23 @@ extern struct work_restart *work_restart; extern bool opt_trust_pool; extern uint16_t opt_vote; +#define JSON_RPC_LONGPOLL (1 << 0) +#define JSON_RPC_QUIET_404 (1 << 1) +extern bool opt_redirect; +extern bool have_gbt; +extern bool allow_getwork; +extern bool opt_redirect; + extern void applog(int prio, const char *fmt, ...); extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, bool, bool, int *); +extern json_t *json_rpc_call2(CURL *curl, const char *url, const char *userpass, + const char *rpc_req, int *curl_err, int flags); extern char *bin2hex(const unsigned char *p, size_t len); +extern void abin2hex(char *s, const unsigned char *p, size_t len); extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len); +extern int varint_encode(unsigned char *p, uint64_t n); +extern size_t address_to_script(unsigned char *out, size_t outsz, const char *addr); extern int timeval_subtract(struct timeval *result, struct timeval *x, struct timeval *y); extern bool fulltest(const uint32_t *hash, const uint32_t *target); @@ -301,6 +424,13 @@ struct stratum_job { bool clean; unsigned char nreward[2]; double diff; + + unsigned char m7prevblock[32]; + unsigned char m7accroot[32]; + unsigned char m7merkleroot[32]; + unsigned char m7height[8]; + unsigned char m7ntime[8]; + unsigned char m7version[2]; }; struct stratum_ctx { @@ -332,6 +462,7 @@ void stratum_disconnect(struct stratum_ctx *sctx); bool stratum_subscribe(struct stratum_ctx *sctx); bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *pass); bool stratum_handle_method(struct stratum_ctx *sctx, const char *s); +bool stratum_handle_method_m7(struct stratum_ctx *sctx, const char *s); struct thread_q; diff --git a/pluck/cuda_pluck.cu b/pluck/cuda_pluck.cu new file mode 100644 index 0000000000..d17248e93d --- /dev/null +++ b/pluck/cuda_pluck.cu @@ -0,0 +1,632 @@ +/* + * "pluck" kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2015 djm34 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author djm34 + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +extern int compute_version[8]; + +__device__ uint8_t * hashbuffer; +uint32_t *d_PlNonce[8]; +__constant__ uint32_t pTarget[8]; +__constant__ uint32_t c_data[20]; +#include "cuda_vector.h" + + +#define HASH_MEMORY_8bit 131072 +#define HASH_MEMORY_32bit 32768 +#define HASH_MEMORY 4096 + +static __constant__ uint32_t H256[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, + 0xA54FF53A, 0x510E527F, 0x9B05688C, + 0x1F83D9AB, 0x5BE0CD19 +}; + +static __constant__ uint32_t Ksha[64] = { + 0x428A2F98, 0x71374491, + 0xB5C0FBCF, 0xE9B5DBA5, + 0x3956C25B, 0x59F111F1, + 0x923F82A4, 0xAB1C5ED5, + 0xD807AA98, 0x12835B01, + 0x243185BE, 0x550C7DC3, + 0x72BE5D74, 0x80DEB1FE, + 0x9BDC06A7, 0xC19BF174, + 0xE49B69C1, 0xEFBE4786, + 0x0FC19DC6, 0x240CA1CC, + 0x2DE92C6F, 0x4A7484AA, + 0x5CB0A9DC, 0x76F988DA, + 0x983E5152, 0xA831C66D, + 0xB00327C8, 0xBF597FC7, + 0xC6E00BF3, 0xD5A79147, + 0x06CA6351, 0x14292967, + 0x27B70A85, 0x2E1B2138, + 0x4D2C6DFC, 0x53380D13, + 0x650A7354, 0x766A0ABB, + 0x81C2C92E, 0x92722C85, + 0xA2BFE8A1, 0xA81A664B, + 0xC24B8B70, 0xC76C51A3, + 0xD192E819, 0xD6990624, + 0xF40E3585, 0x106AA070, + 0x19A4C116, 0x1E376C08, + 0x2748774C, 0x34B0BCB5, + 0x391C0CB3, 0x4ED8AA4A, + 0x5B9CCA4F, 0x682E6FF3, + 0x748F82EE, 0x78A5636F, + 0x84C87814, 0x8CC70208, + 0x90BEFFFA, 0xA4506CEB, + 0xBEF9A3F7, 0xC67178F2 +}; + + +#define SALSA(a,b,c,d) { \ + t =a+d; b^=rotate(t, 7); \ + t =b+a; c^=rotate(t, 9); \ + t =c+b; d^=rotate(t, 13); \ + t =d+c; a^=rotate(t, 18); \ +} + + +#define SALSA_CORE(state) { \ +\ +SALSA(state.s0,state.s4,state.s8,state.sc); \ +SALSA(state.s5,state.s9,state.sd,state.s1); \ +SALSA(state.sa,state.se,state.s2,state.s6); \ +SALSA(state.sf,state.s3,state.s7,state.sb); \ +SALSA(state.s0,state.s1,state.s2,state.s3); \ +SALSA(state.s5,state.s6,state.s7,state.s4); \ +SALSA(state.sa,state.sb,state.s8,state.s9); \ +SALSA(state.sf,state.sc,state.sd,state.se); \ + } + + +static __device__ __forceinline__ uint16 xor_salsa8(const uint16 &Bx) +{ + uint32_t t; + uint16 state = Bx; + SALSA_CORE(state); + SALSA_CORE(state); + SALSA_CORE(state); + SALSA_CORE(state); + return(state+Bx); +} + + + +// sha256 + +static __device__ __forceinline__ uint32_t bsg2_0(const uint32_t x) +{ + uint32_t r1 = SPH_ROTR32(x, 2); + uint32_t r2 = SPH_ROTR32(x, 13); + uint32_t r3 = SPH_ROTR32(x, 22); + return xor3b(r1, r2, r3); +} + +static __device__ __forceinline__ uint32_t bsg2_1(const uint32_t x) +{ + uint32_t r1 = SPH_ROTR32(x, 6); + uint32_t r2 = SPH_ROTR32(x, 11); + uint32_t r3 = SPH_ROTR32(x, 25); + return xor3b(r1, r2, r3); +} + +static __device__ __forceinline__ uint32_t ssg2_0(const uint32_t x) +{ + uint64_t r1 = SPH_ROTR32(x, 7); + uint64_t r2 = SPH_ROTR32(x, 18); + uint64_t r3 = shr_t32(x, 3); + return xor3b(r1, r2, r3); +} + +static __device__ __forceinline__ uint32_t ssg2_1(const uint32_t x) +{ + uint64_t r1 = SPH_ROTR32(x, 17); + uint64_t r2 = SPH_ROTR32(x, 19); + uint64_t r3 = shr_t32(x, 10); + return xor3b(r1, r2, r3); +} + +static __device__ __forceinline__ void sha2_step1(const uint32_t a, const uint32_t b, const uint32_t c, uint32_t &d, const uint32_t e, +const uint32_t f, const uint32_t g, uint32_t &h, const uint32_t in, const uint32_t Kshared) +{ + uint32_t t1, t2; + uint32_t vxandx = xandx(e, f, g); + uint32_t bsg21 = bsg2_1(e); + uint32_t bsg20 = bsg2_0(a); + uint32_t andorv = andor32(a, b, c); + + t1 = h + bsg21 + vxandx + Kshared + in; + t2 = bsg20 + andorv; + d = d + t1; + h = t1 + t2; +} + +static __device__ __forceinline__ void sha2_step2(const uint32_t a, const uint32_t b, const uint32_t c, uint32_t &d, const uint32_t e, +const uint32_t f, const uint32_t g, uint32_t &h, uint32_t* in, const uint32_t pc, const uint32_t Kshared) +{ + uint32_t t1, t2; + + int pcidx1 = (pc - 2) & 0xF; + int pcidx2 = (pc - 7) & 0xF; + int pcidx3 = (pc - 15) & 0xF; + uint32_t inx0 = in[pc]; + uint32_t inx1 = in[pcidx1]; + uint32_t inx2 = in[pcidx2]; + uint32_t inx3 = in[pcidx3]; + + + uint32_t ssg21 = ssg2_1(inx1); + uint32_t ssg20 = ssg2_0(inx3); + uint32_t vxandx = xandx(e, f, g); + uint32_t bsg21 = bsg2_1(e); + uint32_t bsg20 = bsg2_0(a); + uint32_t andorv = andor32(a, b, c); + + in[pc] = ssg21 + inx2 + ssg20 + inx0; + + t1 = h + bsg21 + vxandx + Kshared + in[pc]; + t2 = bsg20 + andorv; + d = d + t1; + h = t1 + t2; + +} + + +static __device__ __forceinline__ void sha2_round_body(uint32_t* in, uint32_t* r) +{ + uint32_t a = r[0]; + uint32_t b = r[1]; + uint32_t c = r[2]; + uint32_t d = r[3]; + uint32_t e = r[4]; + uint32_t f = r[5]; + uint32_t g = r[6]; + uint32_t h = r[7]; + + sha2_step1(a, b, c, d, e, f, g, h, in[0], Ksha[0]); + sha2_step1(h, a, b, c, d, e, f, g, in[1], Ksha[1]); + sha2_step1(g, h, a, b, c, d, e, f, in[2], Ksha[2]); + sha2_step1(f, g, h, a, b, c, d, e, in[3], Ksha[3]); + sha2_step1(e, f, g, h, a, b, c, d, in[4], Ksha[4]); + sha2_step1(d, e, f, g, h, a, b, c, in[5], Ksha[5]); + sha2_step1(c, d, e, f, g, h, a, b, in[6], Ksha[6]); + sha2_step1(b, c, d, e, f, g, h, a, in[7], Ksha[7]); + sha2_step1(a, b, c, d, e, f, g, h, in[8], Ksha[8]); + sha2_step1(h, a, b, c, d, e, f, g, in[9], Ksha[9]); + sha2_step1(g, h, a, b, c, d, e, f, in[10], Ksha[10]); + sha2_step1(f, g, h, a, b, c, d, e, in[11], Ksha[11]); + sha2_step1(e, f, g, h, a, b, c, d, in[12], Ksha[12]); + sha2_step1(d, e, f, g, h, a, b, c, in[13], Ksha[13]); + sha2_step1(c, d, e, f, g, h, a, b, in[14], Ksha[14]); + sha2_step1(b, c, d, e, f, g, h, a, in[15], Ksha[15]); + +#pragma unroll 3 + for (int i = 0; i<3; i++) { + + sha2_step2(a, b, c, d, e, f, g, h, in, 0, Ksha[16 + 16 * i]); + sha2_step2(h, a, b, c, d, e, f, g, in, 1, Ksha[17 + 16 * i]); + sha2_step2(g, h, a, b, c, d, e, f, in, 2, Ksha[18 + 16 * i]); + sha2_step2(f, g, h, a, b, c, d, e, in, 3, Ksha[19 + 16 * i]); + sha2_step2(e, f, g, h, a, b, c, d, in, 4, Ksha[20 + 16 * i]); + sha2_step2(d, e, f, g, h, a, b, c, in, 5, Ksha[21 + 16 * i]); + sha2_step2(c, d, e, f, g, h, a, b, in, 6, Ksha[22 + 16 * i]); + sha2_step2(b, c, d, e, f, g, h, a, in, 7, Ksha[23 + 16 * i]); + sha2_step2(a, b, c, d, e, f, g, h, in, 8, Ksha[24 + 16 * i]); + sha2_step2(h, a, b, c, d, e, f, g, in, 9, Ksha[25 + 16 * i]); + sha2_step2(g, h, a, b, c, d, e, f, in, 10, Ksha[26 + 16 * i]); + sha2_step2(f, g, h, a, b, c, d, e, in, 11, Ksha[27 + 16 * i]); + sha2_step2(e, f, g, h, a, b, c, d, in, 12, Ksha[28 + 16 * i]); + sha2_step2(d, e, f, g, h, a, b, c, in, 13, Ksha[29 + 16 * i]); + sha2_step2(c, d, e, f, g, h, a, b, in, 14, Ksha[30 + 16 * i]); + sha2_step2(b, c, d, e, f, g, h, a, in, 15, Ksha[31 + 16 * i]); + + } + + + + r[0] += a; + r[1] += b; + r[2] += c; + r[3] += d; + r[4] += e; + r[5] += f; + r[6] += g; + r[7] += h; +} + + +static __device__ __forceinline__ uint8 sha256_64(uint32_t *data) +{ + + uint32_t __align__(64) in[16]; + uint32_t __align__(32) buf[8]; + + ((uint16 *)in)[0] = swapvec((uint16*)data); + + ((uint8*)buf)[0] = ((uint8*)H256)[0]; + + sha2_round_body(in, buf); + +#pragma unroll 14 + for (int i = 0; i<14; i++) { in[i + 1] = 0; } + in[0] = 0x80000000; + in[15] = 0x200; + + + sha2_round_body(in, buf); + return swapvec((uint8*)buf); +} + + +static __device__ __forceinline__ uint8 sha256_80(uint32_t nonce) +{ + +// uint32_t in[16], buf[8]; + uint32_t __align__(64) in[16]; + uint32_t __align__(32) buf[8]; + ((uint16 *)in)[0] = swapvec((uint16*)c_data); + + ((uint8*)buf)[0] = ((uint8*)H256)[0]; + + sha2_round_body(in, buf); + + +#pragma unroll 3 + for (int i = 0; i<3; i++) { in[i] = cuda_swab32(c_data[i + 16]); } +// in[3] = cuda_swab32(nonce); + in[3] = nonce; + in[4] = 0x80000000; + in[15] = 0x280; + +#pragma unroll 10 + for (int i = 5; i<15; i++) { in[i] = 0; } + + sha2_round_body(in, buf); + return swapvec((uint8*)buf); +} + + +#define SHIFT 32 * 1024 * 4 +__global__ __launch_bounds__(256, 1) void pluck_gpu_hash0_v50(int threads, uint32_t startNonce) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + const uint32_t nonce = startNonce + thread; + + int shift = SHIFT * thread; //uint32_t + ((uint8*)(hashbuffer + shift))[0] = sha256_80(nonce); + ((uint8*)(hashbuffer + shift))[1] = make_uint8(0, 0, 0, 0, 0, 0, 0, 0); + for (int i = 2; i < 5; i++) + { + uint32_t randmax = i * 32 - 4; + uint32_t randseed[16]; + uint32_t randbuffer[16]; + uint32_t joint[16]; + uint8 Buffbuffer[2]; + + ((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]); + ((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]); + + + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + +// ((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i - 1) << 5]); + ((uint8*)joint)[0] = ((uint8*)randseed)[1]; +#pragma unroll + for (int j = 0; j < 8; j++) + { + uint32_t rand = randbuffer[j] % (randmax - 32); + joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]); + } + + uint8 truc = sha256_64(joint); + ((uint8*)(hashbuffer + shift))[i] = truc; + ((uint8*)randseed)[0] = ((uint8*)joint)[0]; + ((uint8*)randseed)[1] = truc; + + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + + + for (int j = 0; j < 32; j += 2) + { + + uint32_t rand = randbuffer[j / 2] % randmax; + (hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]); + (hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]); + (hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]); + (hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]); + } + + } // main loop + +} +} +__global__ __launch_bounds__(256, 1) void pluck_gpu_hash_v50(int threads, uint32_t startNonce, uint32_t *nonceVector) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + const uint32_t nonce = startNonce + thread; + + int shift = SHIFT * thread; //uint32_t + + for (int i = 5; i < HASH_MEMORY - 1; i++) + { + uint32_t randmax = i*32-4; + uint32_t randseed[16]; + uint32_t randbuffer[16]; + uint32_t joint[16]; + uint8 Buffbuffer[2]; + + ((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32*i-64]); + ((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32*i-32]); + + + Buffbuffer[0] = __ldg8(&(hashbuffer + shift)[32*i - 128]); + Buffbuffer[1] = __ldg8(&(hashbuffer + shift)[32*i - 96]); + ((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0]; + + ((uint16*)randbuffer)[0]= xor_salsa8(((uint16*)randseed)[0]); + + ((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i-1)<<5]); + +#pragma unroll + for (int j = 0; j < 8; j++) + { + uint32_t rand = randbuffer[j] % (randmax - 32); + joint[j+8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]); + } + + uint8 truc = sha256_64(joint); + ((uint8*)(hashbuffer + shift))[i] = truc; + ((uint8*)randseed)[0] = ((uint8*)joint)[0]; + ((uint8*)randseed)[1] = truc; + + + ((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0]; + + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + + + for (int j = 0; j < 32; j += 2) + { + + uint32_t rand = randbuffer[j / 2] % randmax; + + (hashbuffer+shift)[rand] = __ldg(&(hashbuffer+shift)[randmax+j]); + (hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]); + (hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]); + (hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]); + } + + } // main loop + + uint32_t outbuf = __ldgtoint(&(hashbuffer + shift)[28]); + + if (outbuf <= pTarget[7]) { + nonceVector[0] = nonce; + } + + } +} + +__global__ __launch_bounds__(128, 3) void pluck_gpu_hash0(int threads, uint32_t startNonce) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + const uint32_t nonce = startNonce + thread; + + int shift = SHIFT * thread; //uint32_t + ((uint8*)(hashbuffer + shift))[0] = sha256_80(nonce); + ((uint8*)(hashbuffer + shift))[1] = make_uint8(0, 0, 0, 0, 0, 0, 0, 0); + for (int i = 2; i < 5; i++) + { + uint32_t randmax = i * 32 - 4; + uint32_t randseed[16]; + uint32_t randbuffer[16]; + uint32_t joint[16]; + uint8 Buffbuffer[2]; + + ((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]); + ((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]); + + + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + + // ((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i - 1) << 5]); + ((uint8*)joint)[0] = ((uint8*)randseed)[1]; +#pragma unroll + for (int j = 0; j < 8; j++) + { + uint32_t rand = randbuffer[j] % (randmax - 32); + joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]); + } + + uint8 truc = sha256_64(joint); + ((uint8*)(hashbuffer + shift))[i] = truc; + ((uint8*)randseed)[0] = ((uint8*)joint)[0]; + ((uint8*)randseed)[1] = truc; + + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + + + for (int j = 0; j < 32; j += 2) + { + + uint32_t rand = randbuffer[j / 2] % randmax; + (hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]); + (hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]); + (hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]); + (hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]); + } + + } // main loop + + } +} +__global__ __launch_bounds__(128, 3) void pluck_gpu_hash(int threads, uint32_t startNonce, uint32_t *nonceVector) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + const uint32_t nonce = startNonce + thread; + + int shift = SHIFT * thread; //uint32_t + + for (int i = 5; i < HASH_MEMORY - 1; i++) + { + uint32_t randmax = i * 32 - 4; + uint32_t randseed[16]; + uint32_t randbuffer[16]; + uint32_t joint[16]; + uint8 Buffbuffer[2]; + + ((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]); + ((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]); + + + Buffbuffer[0] = __ldg8(&(hashbuffer + shift)[32 * i - 128]); + Buffbuffer[1] = __ldg8(&(hashbuffer + shift)[32 * i - 96]); + ((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0]; + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + + ((uint8*)joint)[0] = __ldg8(&(hashbuffer + shift)[(i - 1) << 5]); + +#pragma unroll + for (int j = 0; j < 8; j++) + { + uint32_t rand = randbuffer[j] % (randmax - 32); + joint[j + 8] = __ldgtoint_unaligned(&(hashbuffer + shift)[rand]); + } + + uint8 truc = sha256_64(joint); + ((uint8*)(hashbuffer + shift))[i] = truc; + ((uint8*)randseed)[0] = ((uint8*)joint)[0]; + ((uint8*)randseed)[1] = truc; + + + ((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0]; + + + ((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]); + + + for (int j = 0; j < 32; j += 2) + { + + uint32_t rand = randbuffer[j / 2] % randmax; + + (hashbuffer + shift)[rand] = __ldg(&(hashbuffer + shift)[randmax + j]); + (hashbuffer + shift)[rand + 1] = __ldg(&(hashbuffer + shift)[randmax + j + 1]); + (hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]); + (hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]); + } + + } // main loop + + uint32_t outbuf = __ldgtoint(&(hashbuffer + shift)[28]); + + if (outbuf <= pTarget[7]) { + nonceVector[0] = nonce; + } + + } +} + + +void pluck_cpu_init(int thr_id, int threads, uint32_t* hash) +{ + + cudaMemcpyToSymbol(hashbuffer, &hash, sizeof(hash), 0, cudaMemcpyHostToDevice); + cudaMalloc(&d_PlNonce[thr_id], sizeof(uint32_t)); + +} + + +__host__ uint32_t pluck_cpu_hash(int thr_id, int threads, uint32_t startNounce, int order) +{ + uint32_t result[8] = {0xffffffff}; + cudaMemset(d_PlNonce[thr_id], 0xffffffff, sizeof(uint32_t)); + + + const int threadsperblock = 128; + + + dim3 grid((threads + threadsperblock - 1) / threadsperblock); + dim3 block(threadsperblock); + dim3 grid50((threads + 256 - 1) / 256); + dim3 block50(256); + + if (compute_version[thr_id]==50) { + pluck_gpu_hash0_v50 << > >(threads, startNounce); + pluck_gpu_hash_v50 << > >(threads, startNounce, d_PlNonce[thr_id]); + } + else { + pluck_gpu_hash0 << > >(threads, startNounce); + pluck_gpu_hash << > >(threads, startNounce, d_PlNonce[thr_id]); + } + + MyStreamSynchronize(NULL, order, thr_id); + cudaMemcpy(&result[thr_id], d_PlNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); + +return result[thr_id]; +} + + + +__host__ void pluck_setBlockTarget(const void *pdata, const void *ptarget) +{ + unsigned char PaddedMessage[80]; + memcpy(PaddedMessage, pdata, 80); + cudaMemcpyToSymbol(c_data, PaddedMessage, 10 * sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(pTarget, ptarget, 8 * sizeof(uint32_t), 0, cudaMemcpyHostToDevice); +} \ No newline at end of file diff --git a/pluck/pluck.cu b/pluck/pluck.cu new file mode 100644 index 0000000000..a9a7d07bc9 --- /dev/null +++ b/pluck/pluck.cu @@ -0,0 +1,288 @@ + +extern "C" +{ +//#include "sph/neoscrypt.h" +#include "miner.h" +} + +#include + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen + +static uint32_t *d_hash[8] ; + + +extern void pluck_setBlockTarget(const void* data, const void *ptarget); +extern void pluck_cpu_init(int thr_id, int threads, uint32_t *d_outputHash); +extern uint32_t pluck_cpu_hash(int thr_id, int threads, uint32_t startNounce, int order); + + +extern float tp_coef[8]; +extern bool opt_benchmark; + +#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) +//note, this is 64 bytes +static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) +{ +#define ROTL(a, b) (((a) << (b)) | ((a) >> (32 - (b)))) + uint32_t x00, x01, x02, x03, x04, x05, x06, x07, x08, x09, x10, x11, x12, x13, x14, x15; + int i; + + x00 = (B[0] ^= Bx[0]); + x01 = (B[1] ^= Bx[1]); + x02 = (B[2] ^= Bx[2]); + x03 = (B[3] ^= Bx[3]); + x04 = (B[4] ^= Bx[4]); + x05 = (B[5] ^= Bx[5]); + x06 = (B[6] ^= Bx[6]); + x07 = (B[7] ^= Bx[7]); + x08 = (B[8] ^= Bx[8]); + x09 = (B[9] ^= Bx[9]); + x10 = (B[10] ^= Bx[10]); + x11 = (B[11] ^= Bx[11]); + x12 = (B[12] ^= Bx[12]); + x13 = (B[13] ^= Bx[13]); + x14 = (B[14] ^= Bx[14]); + x15 = (B[15] ^= Bx[15]); + for (i = 0; i < 8; i += 2) { + /* Operate on columns. */ + x04 ^= ROTL(x00 + x12, 7); x09 ^= ROTL(x05 + x01, 7); + x14 ^= ROTL(x10 + x06, 7); x03 ^= ROTL(x15 + x11, 7); + + x08 ^= ROTL(x04 + x00, 9); x13 ^= ROTL(x09 + x05, 9); + x02 ^= ROTL(x14 + x10, 9); x07 ^= ROTL(x03 + x15, 9); + + x12 ^= ROTL(x08 + x04, 13); x01 ^= ROTL(x13 + x09, 13); + x06 ^= ROTL(x02 + x14, 13); x11 ^= ROTL(x07 + x03, 13); + + x00 ^= ROTL(x12 + x08, 18); x05 ^= ROTL(x01 + x13, 18); + x10 ^= ROTL(x06 + x02, 18); x15 ^= ROTL(x11 + x07, 18); + + /* Operate on rows. */ + x01 ^= ROTL(x00 + x03, 7); x06 ^= ROTL(x05 + x04, 7); + x11 ^= ROTL(x10 + x09, 7); x12 ^= ROTL(x15 + x14, 7); + + x02 ^= ROTL(x01 + x00, 9); x07 ^= ROTL(x06 + x05, 9); + x08 ^= ROTL(x11 + x10, 9); x13 ^= ROTL(x12 + x15, 9); + + x03 ^= ROTL(x02 + x01, 13); x04 ^= ROTL(x07 + x06, 13); + x09 ^= ROTL(x08 + x11, 13); x14 ^= ROTL(x13 + x12, 13); + + x00 ^= ROTL(x03 + x02, 18); x05 ^= ROTL(x04 + x07, 18); + x10 ^= ROTL(x09 + x08, 18); x15 ^= ROTL(x14 + x13, 18); + } + B[0] += x00; + B[1] += x01; + B[2] += x02; + B[3] += x03; + B[4] += x04; + B[5] += x05; + B[6] += x06; + B[7] += x07; + B[8] += x08; + B[9] += x09; + B[10] += x10; + B[11] += x11; + B[12] += x12; + B[13] += x13; + B[14] += x14; + B[15] += x15; +#undef ROTL +} + +void sha256_hash(unsigned char *hash, const unsigned char *data, int len) +{ + uint32_t S[16], T[16]; + int i, r; + + sha256_init(S); + for (r = len; r > -9; r -= 64) { + if (r < 64) + memset(T, 0, 64); + memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r)); + if (r >= 0 && r < 64) + ((unsigned char *)T)[r] = 0x80; + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + + if (r < 56) + T[15] = 8 * len; + sha256_transform(S, T, 0); + } + for (i = 0; i < 8; i++) + be32enc((uint32_t *)hash + i, S[i]); +} + +void sha256_hash512(unsigned char *hash, const unsigned char *data) +{ + uint32_t S[16], T[16]; + int i; + + sha256_init(S); + + memcpy(T, data, 64); + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + sha256_transform(S, T, 0); + + memset(T, 0, 64); + //memcpy(T, data + 64, 0); + ((unsigned char *)T)[0] = 0x80; + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + T[15] = 8 * 64; + sha256_transform(S, T, 0); + + for (i = 0; i < 8; i++) + be32enc((uint32_t *)hash + i, S[i]); +} + +inline void pluck(uint32_t *hash, uint32_t *input) +{ + + uint32_t data[20]; + + //uint32_t midstate[8]; +// printf("coming here\n"); + const int HASH_MEMORY = 128 * 1024; + uint8_t * scratchbuf = (uint8_t*)malloc(HASH_MEMORY); + + + for (int k = 0; k<20; k++) { data[k] = input[k]; } + + + uint8_t *hashbuffer = scratchbuf; //don't allocate this on stack, since it's huge.. + int size = HASH_MEMORY; +// int size = 224+64; + memset(hashbuffer, 0, 64); + +// for (int k = 0; k<10; k++) { +// printf("cpu init data %d %08x %08x\n", k, ((uint32_t*)(data))[2 * k], ((uint32_t*)(data))[2 * k + 1]);} + sha256_hash(&hashbuffer[0], (uint8_t*)data, 80); +// for (int k = 0; k<8; k++) { printf("cpu hash %d %08x \n", k, ((uint32_t*)hashbuffer)[k]); } + + for (int i = 64; i < size - 32; i += 32) + { + //i-4 because we use integers for all references against this, and we don't want to go 3 bytes over the defined area + int randmax = i - 4; //we could use size here, but then it's probable to use 0 as the value in most cases + uint32_t joint[16]; + uint32_t randbuffer[16]; + + uint32_t randseed[16]; + memcpy(randseed, &hashbuffer[i - 64], 64); + if (i>128) + { + memcpy(randbuffer, &hashbuffer[i - 128], 64); + } + else + { + memset(&randbuffer, 0, 64); + } + + xor_salsa8(randbuffer, randseed); + + memcpy(joint, &hashbuffer[i - 32], 32); + //use the last hash value as the seed + for (int j = 32; j < 64; j += 4) + { + uint32_t rand = randbuffer[(j - 32) / 4] % (randmax - 32); //randmax - 32 as otherwise we go beyond memory that's already been written to + joint[j / 4] = *((uint32_t*)&hashbuffer[rand]); + } + sha256_hash512(&hashbuffer[i], (uint8_t*)joint); +// for (int k = 0; k<8; k++) { printf("sha hashbuffer %d %08x\n", k, ((uint32_t*)(hashbuffer+i))[k]); } + memcpy(randseed, &hashbuffer[i - 32], 64); //use last hash value and previous hash value(post-mixing) + if (i>128) + { + memcpy(randbuffer, &hashbuffer[i - 128], 64); + } + else + { + memset(randbuffer, 0, 64); + } + xor_salsa8(randbuffer, randseed); + for (int j = 0; j < 32; j += 2) + { + uint32_t rand = randbuffer[j / 2] % randmax; + *((uint32_t*)&hashbuffer[rand]) = *((uint32_t*)&hashbuffer[j + i - 4]); + } + } + +// for (int k = 0; k<8; k++) { printf("cpu final hash %d %08x\n", k, ((uint32_t*)hashbuffer)[k]); } + + //note: off-by-one error is likely here... +/* + for (int i = size - 64 - 1; i >= 64; i -= 64) + { + sha256_hash512(&hashbuffer[i - 64], &hashbuffer[i]); + } + + for (int k = 0; k<8; k++) { printf("cpu after of by one final hash %d %08x\n", k, ((uint32_t*)hashbuffer)[k]); } +*/ + memcpy((unsigned char*)hash, hashbuffer, 32); +} + +extern "C" int scanhash_pluck(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + if (tp_coef[thr_id]<0) { tp_coef[thr_id]=2.45; } + const int throughput = (uint32_t)((float)(32*1*64*tp_coef[thr_id])); + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + cudaDeviceReset(); + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 32 * 1024 * sizeof(uint32_t) * throughput); + + + pluck_cpu_init(thr_id, throughput,d_hash[thr_id]); + init[thr_id] = true; + } + + uint32_t endiandata[20]; + + for (int k = 0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + pluck_setBlockTarget(endiandata,ptarget); + + do { + int order = 0; + uint32_t foundNonce = pluck_cpu_hash(thr_id, throughput, pdata[19], order++); +// foundNonce = pdata[19]; + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + +// be32enc(&endiandata[19], foundNonce); +// pluck(vhash64,endiandata); +// printf("target %08x vhash64 %08x", ptarget[7], vhash64[7]); +// if ( vhash64[7] <= ptarget[7]) { // && fulltest(vhash64, ptarget)) { + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; +// } else { +// *hashes_done = foundNonce - first_nonce + 1; // keeps hashrate calculation happy +// applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); +// } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/qubit/deep.cu b/qubit/deep.cu new file mode 100644 index 0000000000..94ff57f11e --- /dev/null +++ b/qubit/deep.cu @@ -0,0 +1,151 @@ +/* + * deepcoin algorithm + * + */ + +extern "C" +{ + +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + + +extern void qubit_luffa512_cpu_init(int thr_id, int threads); +extern void qubit_luffa512_cpu_setBlock_80(void *pdata); +extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget); +extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); + +extern void x11_cubehash512_cpu_init(int thr_id, int threads); +extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_echo512_cpu_init(int thr_id, int threads); +extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + + +// X13 Hashfunktion +inline void deephash(void *state, const void *input) +{ + // luffa1-cubehash2-shavite3-simd4-echo5 + + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_echo512_context ctx_echo; + + + uint32_t hash[16]; + + sph_luffa512_init(&ctx_luffa); + // ZBLAKE; + sph_luffa512 (&ctx_luffa, input, 80); + sph_luffa512_close(&ctx_luffa, (void*) hash); + + sph_cubehash512_init(&ctx_cubehash); + // ZCUBEHASH; + sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*) hash); + + sph_echo512_init(&ctx_echo); + // ZECHO + sph_echo512 (&ctx_echo, (const void*) hash, 64); + sph_echo512_close(&ctx_echo, (void*) hash); + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + qubit_luffa512_cpu_init(thr_id, throughput); + x11_cubehash512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + + + + quark_check_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget); + quark_check_cpu_setTarget(ptarget); + + do { + int order = 0; + + // erstes luffa512 Hash mit CUDA + qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Cubehash512 + x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für ECHO512 + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // Scan nach Gewinner Hashes auf der GPU + uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + deephash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/qubit/doom.cu b/qubit/doom.cu new file mode 100644 index 0000000000..e9557a02f3 --- /dev/null +++ b/qubit/doom.cu @@ -0,0 +1,110 @@ +/* + * qubit algorithm + * + */ + +extern "C" +{ + +#include "sph/sph_luffa.h" + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + + +extern void qubit_luffa512_cpu_init(int thr_id, int threads); +extern void qubit_luffa512_cpu_setBlock_80(void *pdata); +extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget); +extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + +inline void doomhash(void *state, const void *input) +{ + // luffa512 + + sph_luffa512_context ctx_luffa; + + + uint32_t hash[16]; + + sph_luffa512_init(&ctx_luffa); + sph_luffa512 (&ctx_luffa, input, 80); + sph_luffa512_close(&ctx_luffa, (void*) hash); + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + qubit_luffa512_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + qubit_luffa512_cpufinal_setBlock_80((void*)endiandata,ptarget); + + + do { + int order = 0; + + uint32_t foundNonce = qubit_luffa512_cpu_finalhash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + doomhash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/qubit/qubit.cu b/qubit/qubit.cu new file mode 100644 index 0000000000..7fe1c485a4 --- /dev/null +++ b/qubit/qubit.cu @@ -0,0 +1,180 @@ +/* + * qubit algorithm + * + */ + +extern "C" +{ + +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + + +extern void qubit_luffa512_cpu_init(int thr_id, int threads); +extern void qubit_luffa512_cpu_setBlock_80(void *pdata); +extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); + +extern void x11_cubehash512_cpu_init(int thr_id, int threads); +extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_shavite512_cpu_init(int thr_id, int threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_simd512_cpu_init(int thr_id, int threads); +extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_echo512_cpu_init(int thr_id, int threads); +extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + +extern void quark_compactTest_cpu_init(int thr_id, int threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, + uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, + int order); + +// X13 Hashfunktion +inline void qubithash(void *state, const void *input) +{ + // luffa1-cubehash2-shavite3-simd4-echo5 + + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + + + uint32_t hash[16]; + + sph_luffa512_init(&ctx_luffa); + // ZBLAKE; + sph_luffa512 (&ctx_luffa, input, 80); + sph_luffa512_close(&ctx_luffa, (void*) hash); + + sph_cubehash512_init(&ctx_cubehash); + // ZCUBEHASH; + sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*) hash); + + sph_shavite512_init(&ctx_shavite); + // ZSHAVITE; + sph_shavite512 (&ctx_shavite, (const void*) hash, 64); + sph_shavite512_close(&ctx_shavite, (void*) hash); + + sph_simd512_init(&ctx_simd); + // ZSIMD + sph_simd512 (&ctx_simd, (const void*) hash, 64); + sph_simd512_close(&ctx_simd, (void*) hash); + + sph_echo512_init(&ctx_echo); + // ZECHO + sph_echo512 (&ctx_echo, (const void*) hash, 64); + sph_echo512_close(&ctx_echo, (void*) hash); + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + qubit_luffa512_cpu_init(thr_id, throughput); + x11_cubehash512_cpu_init(thr_id, throughput); + x11_shavite512_cpu_init(thr_id, throughput); + x11_simd512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + + + + quark_check_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + qubit_luffa512_cpu_setBlock_80((void*)endiandata); + quark_check_cpu_setTarget(ptarget); + + do { + int order = 0; + + // erstes luffa512 Hash mit CUDA + qubit_luffa512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Cubehash512 + x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Shavite512 + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für SIMD512 + x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für ECHO512 + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + + // Scan nach Gewinner Hashes auf der GPU + uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + qubithash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu new file mode 100644 index 0000000000..324f87df26 --- /dev/null +++ b/qubit/qubit_luffa512.cu @@ -0,0 +1,512 @@ +/* + * luffa_for_32.c + * Version 2.0 (Sep 15th 2009) + * + * Copyright (C) 2008-2009 Hitachi, Ltd. All rights reserved. + * + * Hitachi, Ltd. is the owner of this software and hereby grant + * the U.S. Government and any interested party the right to use + * this software for the purposes of the SHA-3 evaluation process, + * notwithstanding that this software is copyrighted. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include +// aus heavy.cu +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +typedef unsigned char BitSequence; + + +#include "cuda_helper.h" +__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) +__constant__ uint32_t pTarget[8]; +uint32_t *d_lnounce[8]; +uint32_t *d_LNonce[8]; + +typedef struct { + uint32_t buffer[8]; /* Buffer to be hashed */ + uint32_t chainv[40]; /* Chaining values */ +} hashState; + + +static __device__ __forceinline__ uint32_t BYTES_SWAP32(uint32_t x) +{ + return __byte_perm(x, x, 0x0123); +} + +#define MULT2(a,j)\ + tmp = a[7+(8*j)];\ + a[7+(8*j)] = a[6+(8*j)];\ + a[6+(8*j)] = a[5+(8*j)];\ + a[5+(8*j)] = a[4+(8*j)];\ + a[4+(8*j)] = a[3+(8*j)] ^ tmp;\ + a[3+(8*j)] = a[2+(8*j)] ^ tmp;\ + a[2+(8*j)] = a[1+(8*j)];\ + a[1+(8*j)] = a[0+(8*j)] ^ tmp;\ + a[0+(8*j)] = tmp; + +#define TWEAK(a0,a1,a2,a3,j)\ + a0 = (a0<<(j))|(a0>>(32-j));\ + a1 = (a1<<(j))|(a1>>(32-j));\ + a2 = (a2<<(j))|(a2>>(32-j));\ + a3 = (a3<<(j))|(a3>>(32-j)); + +#define STEP(c0,c1)\ + SUBCRUMB(chainv[0],chainv[1],chainv[2],chainv[3],tmp);\ + SUBCRUMB(chainv[5],chainv[6],chainv[7],chainv[4],tmp);\ + MIXWORD(chainv[0],chainv[4]);\ + MIXWORD(chainv[1],chainv[5]);\ + MIXWORD(chainv[2],chainv[6]);\ + MIXWORD(chainv[3],chainv[7]);\ + ADD_CONSTANT(chainv[0],chainv[4],c0,c1); + +#define SUBCRUMB(a0,a1,a2,a3,a4)\ + a4 = a0;\ + a0 |= a1;\ + a2 ^= a3;\ + a1 = ~a1;\ + a0 ^= a3;\ + a3 &= a4;\ + a1 ^= a3;\ + a3 ^= a2;\ + a2 &= a0;\ + a0 = ~a0;\ + a2 ^= a1;\ + a1 |= a3;\ + a4 ^= a1;\ + a3 ^= a2;\ + a2 &= a1;\ + a1 ^= a0;\ + a0 = a4; + +#define MIXWORD(a0,a4)\ + a4 ^= a0;\ + a0 = (a0<<2) | (a0>>(30));\ + a0 ^= a4;\ + a4 = (a4<<14) | (a4>>(18));\ + a4 ^= a0;\ + a0 = (a0<<10) | (a0>>(22));\ + a0 ^= a4;\ + a4 = (a4<<1) | (a4>>(31)); + +#define ADD_CONSTANT(a0,b0,c0,c1)\ + a0 ^= c0;\ + b0 ^= c1; + +/* initial values of chaining variables */ +__constant__ uint32_t c_IV[40]; +const uint32_t h2_IV[40] = { + 0x6d251e69,0x44b051e0,0x4eaa6fb4,0xdbf78465, + 0x6e292011,0x90152df4,0xee058139,0xdef610bb, + 0xc3b44b95,0xd9d2f256,0x70eee9a0,0xde099fa3, + 0x5d9b0557,0x8fc944b3,0xcf1ccf0e,0x746cd581, + 0xf7efc89d,0x5dba5781,0x04016ce5,0xad659c05, + 0x0306194f,0x666d1836,0x24aa230a,0x8b264ae7, + 0x858075d5,0x36d79cce,0xe571f7d7,0x204b1f67, + 0x35870c6a,0x57e9e923,0x14bcb808,0x7cde72ce, + 0x6c68e9be,0x5ec41e22,0xc825b7c7,0xaffb4363, + 0xf5df3999,0x0fc688f1,0xb07224cc,0x03e86cea}; + +__constant__ uint32_t c_CNS[80]; +uint32_t h2_CNS[80] = { + 0x303994a6,0xe0337818,0xc0e65299,0x441ba90d, + 0x6cc33a12,0x7f34d442,0xdc56983e,0x9389217f, + 0x1e00108f,0xe5a8bce6,0x7800423d,0x5274baf4, + 0x8f5b7882,0x26889ba7,0x96e1db12,0x9a226e9d, + 0xb6de10ed,0x01685f3d,0x70f47aae,0x05a17cf4, + 0x0707a3d4,0xbd09caca,0x1c1e8f51,0xf4272b28, + 0x707a3d45,0x144ae5cc,0xaeb28562,0xfaa7ae2b, + 0xbaca1589,0x2e48f1c1,0x40a46f3e,0xb923c704, + 0xfc20d9d2,0xe25e72c1,0x34552e25,0xe623bb72, + 0x7ad8818f,0x5c58a4a4,0x8438764a,0x1e38e2e7, + 0xbb6de032,0x78e38b9d,0xedb780c8,0x27586719, + 0xd9847356,0x36eda57f,0xa2c78434,0x703aace7, + 0xb213afa5,0xe028c9bf,0xc84ebe95,0x44756f91, + 0x4e608a22,0x7e8fce32,0x56d858fe,0x956548be, + 0x343b138f,0xfe191be2,0xd0ec4e3d,0x3cb226e5, + 0x2ceb4882,0x5944a28e,0xb3ad2208,0xa1c4c355, + 0xf0d2e9e3,0x5090d577,0xac11d7fa,0x2d1925ab, + 0x1bcb66f2,0xb46496ac,0x6f2d9bc9,0xd1925ab0, + 0x78602649,0x29131ab6,0x8edae952,0x0fc053c3, + 0x3b6ba548,0x3f014f0c,0xedae9520,0xfc053c31}; + + +/***************************************************/ +__device__ __forceinline__ void rnd512(hashState *state) +{ + int i,j; + uint32_t t[40]; + uint32_t chainv[8]; + uint32_t tmp; + +#pragma unroll 8 + for(i=0;i<8;i++) { + t[i]=0; +#pragma unroll 5 + for(j=0;j<5;j++) { + t[i] ^= state->chainv[i+8*j]; + } + } + + MULT2(t, 0); + +#pragma unroll 5 + for(j=0;j<5;j++) { +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[i+8*j] ^= t[i]; + } + } + +#pragma unroll 5 + for(j=0;j<5;j++) { +#pragma unroll 8 + for(i=0;i<8;i++) { + t[i+8*j] = state->chainv[i+8*j]; + } + } + +#pragma unroll 5 + for(j=0;j<5;j++) { + MULT2(state->chainv, j); + } + +#pragma unroll 5 + for(j=0;j<5;j++) { +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[8*j+i] ^= t[8*((j+1)%5)+i]; + } + } + +#pragma unroll 5 + for(j=0;j<5;j++) { +#pragma unroll 8 + for(i=0;i<8;i++) { + t[i+8*j] = state->chainv[i+8*j]; + } + } + +#pragma unroll 5 + for(j=0;j<5;j++) { + MULT2(state->chainv, j); + } + +#pragma unroll 5 + for(j=0;j<5;j++) { +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[8*j+i] ^= t[8*((j+4)%5)+i]; + } + } + +#pragma unroll 5 + for(j=0;j<5;j++) { +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[i+8*j] ^= state->buffer[i]; + } + MULT2(state->buffer, 0); + } + +#pragma unroll 8 + for(i=0;i<8;i++) { + chainv[i] = state->chainv[i]; + } + +#pragma unroll 8 + for(i=0;i<8;i++) { + STEP(c_CNS[(2*i)],c_CNS[(2*i)+1]); + } + +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[i] = chainv[i]; + chainv[i] = state->chainv[i+8]; + } + + TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],1); + +#pragma unroll 8 + for(i=0;i<8;i++) { + STEP(c_CNS[(2*i)+16],c_CNS[(2*i)+16+1]); + } + +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[i+8] = chainv[i]; + chainv[i] = state->chainv[i+16]; + } + + TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],2); + +#pragma unroll 8 + for(i=0;i<8;i++) { + STEP(c_CNS[(2*i)+32],c_CNS[(2*i)+32+1]); + } + +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[i+16] = chainv[i]; + chainv[i] = state->chainv[i+24]; + } + + TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],3); + +#pragma unroll 8 + for(i=0;i<8;i++) { + STEP(c_CNS[(2*i)+48],c_CNS[(2*i)+48+1]); + } + +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[i+24] = chainv[i]; + chainv[i] = state->chainv[i+32]; + } + + TWEAK(chainv[4],chainv[5],chainv[6],chainv[7],4); + +#pragma unroll 8 + for(i=0;i<8;i++) { + STEP(c_CNS[(2*i)+64],c_CNS[(2*i)+64+1]); + } + +#pragma unroll 8 + for(i=0;i<8;i++) { + state->chainv[i+32] = chainv[i]; + } +} + + +__device__ __forceinline__ void Update512(hashState *state, const BitSequence *data) +{ +#pragma unroll 8 + for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)data)[i]); + rnd512(state); + +#pragma unroll 8 + for(int i=0;i<8;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+32))[i]); + rnd512(state); +#pragma unroll 4 + for(int i=0;i<4;i++) state->buffer[i] = BYTES_SWAP32(((uint32_t*)(data+64))[i]); +} + + +/***************************************************/ +__device__ __forceinline__ void finalization512(hashState *state, uint32_t *b) +{ + int i,j; + + state->buffer[4] = 0x80000000; +#pragma unroll 3 + for(int i=5;i<8;i++) state->buffer[i] = 0; + rnd512(state); + + /*---- blank round with m=0 ----*/ +#pragma unroll 8 + for(i=0;i<8;i++) state->buffer[i] =0; + rnd512(state); + +#pragma unroll 8 + for(i=0;i<8;i++) { + b[i] = 0; +#pragma unroll 5 + for(j=0;j<5;j++) { + b[i] ^= state->chainv[i+8*j]; + } + b[i] = BYTES_SWAP32((b[i])); + } + +#pragma unroll 8 + for(i=0;i<8;i++) state->buffer[i]=0; + rnd512(state); + +#pragma unroll 8 + for(i=0;i<8;i++) { + b[8+i] = 0; +#pragma unroll 5 + for(j=0;j<5;j++) { + b[8+i] ^= state->chainv[i+8*j]; + } + b[8+i] = BYTES_SWAP32((b[8+i])); + } +} + + +/***************************************************/ +// Die Hash-Funktion +__global__ void qubit_luffa512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = startNounce + thread; + +union { +uint64_t buf64[16]; +uint32_t buf32[32]; +} buff; +#pragma unroll 16 + for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i]; + + // die Nounce durch die thread-spezifische ersetzen + buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce)); + + + hashState state; +#pragma unroll 40 + for(int i=0;i<40;i++) state.chainv[i] = c_IV[i]; +#pragma unroll 8 + for(int i=0;i<8;i++) state.buffer[i] = 0; + Update512(&state, (BitSequence*)buff.buf32); + uint32_t *outHash = (uint32_t *)outputHash + 16 * thread; + finalization512(&state, (uint32_t*)outHash); + } +} + +__global__ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = startNounce + thread; + +union { +uint64_t buf64[16]; +uint32_t buf32[32]; +} buff; + +uint32_t Hash[16]; +#pragma unroll 16 + for (int i=0; i < 16; ++i) buff.buf64[i] = c_PaddedMessage80[i]; + + // die Nounce durch die thread-spezifische ersetzen + buff.buf64[9] = REPLACE_HIWORD(buff.buf64[9], cuda_swab32(nounce)); + + + hashState state; +#pragma unroll 40 + for(int i=0;i<40;i++) state.chainv[i] = c_IV[i]; +#pragma unroll 8 + for(int i=0;i<8;i++) state.buffer[i] = 0; + Update512(&state, (BitSequence*)buff.buf32); + finalization512(&state, Hash); + + bool rc = true; + int position = -1; +#pragma unroll 8 + for (int i = 7; i >= 0; i--) { + if (Hash[i] > pTarget[i]) { + if(position < i) { + position = i; + rc = false; + } + + } + if (Hash[i] < pTarget[i]) { + if(position < i) { + position = i; + rc = true; + } + } + } + + if(rc == true) + { + if(resNounce[0] > nounce) + resNounce[0] = nounce; + } + + } +} +// Setup-Funktionen +__host__ void qubit_luffa512_cpu_init(int thr_id, int threads) +{ + cudaMemcpyToSymbol( c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice ); + cudaMemcpyToSymbol( c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice ); + cudaMalloc(&d_LNonce[thr_id], sizeof(uint32_t)); + cudaMallocHost(&d_lnounce[thr_id], 1*sizeof(uint32_t)); +} + +__host__ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order) +{ + uint32_t result = 0xffffffff; + cudaMemset(d_LNonce[thr_id], 0xffffffff, sizeof(uint32_t)); + const int threadsperblock = 256; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs + size_t shared_size = 0; + + qubit_luffa512_gpu_finalhash_80<<>>(threads, startNounce, d_outputHash, d_LNonce[thr_id]); + MyStreamSynchronize(NULL, order, thr_id); + cudaMemcpy(d_lnounce[thr_id], d_LNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); + //cudaThreadSynchronize(); + result = *d_lnounce[thr_id]; + return result; +} + +__host__ void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order) +{ + const int threadsperblock = 256; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + // Größe des dynamischen Shared Memory Bereichs + size_t shared_size = 0; + + qubit_luffa512_gpu_hash_80<<>>(threads, startNounce, d_outputHash); + MyStreamSynchronize(NULL, order, thr_id); +} + +__host__ void qubit_luffa512_cpu_setBlock_80(void *pdata) +{ + // Message mit Padding bereitstellen + // lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen. + unsigned char PaddedMessage[128]; + memcpy(PaddedMessage, pdata, 80); + memset(PaddedMessage+80, 0, 48); + PaddedMessage[80] = 0x80; + PaddedMessage[111] = 1; + PaddedMessage[126] = 0x02; + PaddedMessage[127] = 0x80; + + // die Message zur Berechnung auf der GPU + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); +} + +__host__ void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget) +{ + // Message mit Padding bereitstellen + // lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen. + unsigned char PaddedMessage[128]; + memcpy(PaddedMessage, pdata, 80); + memset(PaddedMessage+80, 0, 48); + PaddedMessage[80] = 0x80; + PaddedMessage[111] = 1; + PaddedMessage[126] = 0x02; + PaddedMessage[127] = 0x80; + cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); + + // die Message zur Berechnung auf der GPU + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); +} \ No newline at end of file diff --git a/sph/Lyra2.c b/sph/Lyra2.c new file mode 100644 index 0000000000..309dc6171d --- /dev/null +++ b/sph/Lyra2.c @@ -0,0 +1,215 @@ +/** + * Implementation of the Lyra2 Password Hashing Scheme (PHS). + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include "Lyra2.h" +#include "Sponge.h" + +/** + * Executes Lyra2 based on the G function from Blake2b. This version supports salts and passwords + * whose combined length is smaller than the size of the memory matrix, (i.e., (nRows x nCols x b) bits, + * where "b" is the underlying sponge's bitrate). In this implementation, the "basil" is composed by all + * integer parameters (treated as type "unsigned int") in the order they are provided, plus the value + * of nCols, (i.e., basil = kLen || pwdlen || saltlen || timeCost || nRows || nCols). + * + * @param K The derived key to be output by the algorithm + * @param kLen Desired key length + * @param pwd User password + * @param pwdlen Password length + * @param salt Salt + * @param saltlen Salt length + * @param timeCost Parameter to determine the processing time (T) + * @param nRows Number or rows of the memory matrix (R) + * @param nCols Number of columns of the memory matrix (C) + * + * @return 0 if the key is generated correctly; -1 if there is an error (usually due to lack of memory for allocation) + */ +int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols) { + + //============================= Basic variables ============================// + int64_t row = 2; //index of row to be processed + int64_t prev = 1; //index of prev (last row ever computed/modified) + int64_t rowa = 0; //index of row* (a previous row, deterministically picked during Setup and randomly picked while Wandering) + int64_t tau; //Time Loop iterator + int64_t step = 1; //Visitation step (used during Setup and Wandering phases) + int64_t window = 2; //Visitation window (used to define which rows can be revisited during Setup) + int64_t gap = 1; //Modifier to the step, assuming the values 1 or -1 + int64_t i; //auxiliary iteration counter + //==========================================================================/ + + //========== Initializing the Memory Matrix and pointers to it =============// + //Tries to allocate enough space for the whole memory matrix + i = (int64_t) ((int64_t) nRows * (int64_t) ROW_LEN_BYTES); + uint64_t *wholeMatrix = malloc(i); + if (wholeMatrix == NULL) { + return -1; + } + memset(wholeMatrix, 0, i); + + //Allocates pointers to each row of the matrix + uint64_t **memMatrix = malloc(nRows * sizeof (uint64_t*)); + if (memMatrix == NULL) { + return -1; + } + //Places the pointers in the correct positions + uint64_t *ptrWord = wholeMatrix; + for (i = 0; i < nRows; i++) { + memMatrix[i] = ptrWord; + ptrWord += ROW_LEN_INT64; + } + //==========================================================================/ + + //============= Getting the password + salt + basil padded with 10*1 ===============// + //OBS.:The memory matrix will temporarily hold the password: not for saving memory, + //but this ensures that the password copied locally will be overwritten as soon as possible + + //First, we clean enough blocks for the password, salt, basil and padding + uint64_t nBlocksInput = ((saltlen + pwdlen + 6 * sizeof (uint64_t)) / BLOCK_LEN_BLAKE2_SAFE_BYTES) + 1; + + byte *ptrByte = (byte*) wholeMatrix; + memset(ptrByte, 0, nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES); + + //Prepends the password + memcpy(ptrByte, pwd, pwdlen); + ptrByte += pwdlen; + + //Concatenates the salt + memcpy(ptrByte, salt, saltlen); + ptrByte += saltlen; + + //Concatenates the basil: every integer passed as parameter, in the order they are provided by the interface + memcpy(ptrByte, &kLen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &pwdlen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &saltlen, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &timeCost, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &nRows, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + memcpy(ptrByte, &nCols, sizeof (uint64_t)); + ptrByte += sizeof (uint64_t); + + //Now comes the padding + *ptrByte = 0x80; //first byte of padding: right after the password + ptrByte = (byte*) wholeMatrix; //resets the pointer to the start of the memory matrix + ptrByte += nBlocksInput * BLOCK_LEN_BLAKE2_SAFE_BYTES - 1; //sets the pointer to the correct position: end of incomplete block + *ptrByte ^= 0x01; //last byte of padding: at the end of the last incomplete block + //==========================================================================/ + + //======================= Initializing the Sponge State ====================// + //Sponge state: 16 uint64_t, BLOCK_LEN_INT64 words of them for the bitrate (b) and the remainder for the capacity (c) + uint64_t *state = malloc(16 * sizeof (uint64_t)); + if (state == NULL) { + return -1; + } + initState(state); + //==========================================================================/ + + //================================ Setup Phase =============================// + //Absorbing salt, password and basil: this is the only place in which the block length is hard-coded to 512 bits + ptrWord = wholeMatrix; + for (i = 0; i < nBlocksInput; i++) { + absorbBlockBlake2Safe(state, ptrWord); //absorbs each block of pad(pwd || salt || basil) + ptrWord += BLOCK_LEN_BLAKE2_SAFE_BYTES; //goes to next block of pad(pwd || salt || basil) + } + + //Initializes M[0] and M[1] + reducedSqueezeRow0(state, memMatrix[0]); //The locally copied password is most likely overwritten here + + + + reducedDuplexRow1(state, memMatrix[0], memMatrix[1]); + + + do { + //M[row] = rand; //M[row*] = M[row*] XOR rotW(rand) + + reducedDuplexRowSetup(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); + + + + //updates the value of row* (deterministically picked during Setup)) + rowa = (rowa + step) & (window - 1); + //update prev: it now points to the last row ever computed + prev = row; + //updates row: goes to the next row to be computed + row++; + + //Checks if all rows in the window where visited. + if (rowa == 0) { + step = window + gap; //changes the step: approximately doubles its value + window *= 2; //doubles the size of the re-visitation window + gap = -gap; //inverts the modifier to the step + } + + } while (row < nRows); + //==========================================================================/ + + //============================ Wandering Phase =============================// + row = 0; //Resets the visitation to the first row of the memory matrix + for (tau = 1; tau <= timeCost; tau++) { + //Step is approximately half the number of all rows of the memory matrix for an odd tau; otherwise, it is -1 + step = (tau % 2 == 0) ? -1 : nRows / 2 - 1; + do { + //Selects a pseudorandom index row* + //------------------------------------------------------------------------------------------ + //rowa = ((unsigned int)state[0]) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) + rowa = ((uint64_t) (state[0])) % nRows; //(USE THIS FOR THE "GENERIC" CASE) + //------------------------------------------------------------------------------------------ + + //Performs a reduced-round duplexing operation over M[row*] XOR M[prev], updating both M[row*] and M[row] + reducedDuplexRow(state, memMatrix[prev], memMatrix[rowa], memMatrix[row]); + + //update prev: it now points to the last row ever computed + prev = row; + + //updates row: goes to the next row to be computed + //------------------------------------------------------------------------------------------ + //row = (row + step) & (nRows-1); //(USE THIS IF nRows IS A POWER OF 2) + row = (row + step) % nRows; //(USE THIS FOR THE "GENERIC" CASE) + //------------------------------------------------------------------------------------------ + + } while (row != 0); + } + //==========================================================================/ + + //============================ Wrap-up Phase ===============================// + //Absorbs the last block of the memory matrix + absorbBlock(state, memMatrix[rowa]); + + //Squeezes the key + squeeze(state, K, kLen); + //==========================================================================/ + + //========================= Freeing the memory =============================// + free(memMatrix); + free(wholeMatrix); + + //Wiping out the sponge's internal state before freeing it + memset(state, 0, 16 * sizeof (uint64_t)); + free(state); + //==========================================================================/ + + return 0; +} diff --git a/sph/Lyra2.h b/sph/Lyra2.h new file mode 100644 index 0000000000..13c7dbd3b3 --- /dev/null +++ b/sph/Lyra2.h @@ -0,0 +1,50 @@ +/** + * Header file for the Lyra2 Password Hashing Scheme (PHS). + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef LYRA2_H_ +#define LYRA2_H_ + +#include + +typedef unsigned char byte; + +//Block length required so Blake2's Initialization Vector (IV) is not overwritten (THIS SHOULD NOT BE MODIFIED) +#define BLOCK_LEN_BLAKE2_SAFE_INT64 8 //512 bits (=64 bytes, =8 uint64_t) +#define BLOCK_LEN_BLAKE2_SAFE_BYTES (BLOCK_LEN_BLAKE2_SAFE_INT64 * 8) //same as above, in bytes + + +#ifdef BLOCK_LEN_BITS + #define BLOCK_LEN_INT64 (BLOCK_LEN_BITS/64) //Block length: 768 bits (=96 bytes, =12 uint64_t) + #define BLOCK_LEN_BYTES (BLOCK_LEN_BITS/8) //Block length, in bytes +#else //default block lenght: 768 bits + #define BLOCK_LEN_INT64 12 //Block length: 768 bits (=96 bytes, =12 uint64_t) + #define BLOCK_LEN_BYTES (BLOCK_LEN_INT64 * 8) //Block length, in bytes +#endif + +#ifndef N_COLS + #define N_COLS 8 //Number of columns in the memory matrix: fixed to 64 by default +#endif + +#define ROW_LEN_INT64 (BLOCK_LEN_INT64 * N_COLS) //Total length of a row: N_COLS blocks +#define ROW_LEN_BYTES (ROW_LEN_INT64 * 8) //Number of bytes per row + + +int LYRA2(void *K, uint64_t kLen, const void *pwd, uint64_t pwdlen, const void *salt, uint64_t saltlen, uint64_t timeCost, uint64_t nRows, uint64_t nCols); + +#endif /* LYRA2_H_ */ diff --git a/sph/Sponge.c b/sph/Sponge.c new file mode 100644 index 0000000000..efe4d19960 --- /dev/null +++ b/sph/Sponge.c @@ -0,0 +1,755 @@ +/** + * A simple implementation of Blake2b's internal permutation + * in the form of a sponge. + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include "Sponge.h" +#include "Lyra2.h" + + + +/** + * Initializes the Sponge State. The first 512 bits are set to zeros and the remainder + * receive Blake2b's IV as per Blake2b's specification. Note: Even though sponges + * typically have their internal state initialized with zeros, Blake2b's G function + * has a fixed point: if the internal state and message are both filled with zeros. the + * resulting permutation will always be a block filled with zeros; this happens because + * Blake2b does not use the constants originally employed in Blake2 inside its G function, + * relying on the IV for avoiding possible fixed points. + * + * @param state The 1024-bit array to be initialized + */ + void initState(uint64_t state[/*16*/]) { + //First 512 bis are zeros + memset(state, 0, 64); + //Remainder BLOCK_LEN_BLAKE2_SAFE_BYTES are reserved to the IV + + state[8] = blake2b_IV[0]; + state[9] = blake2b_IV[1]; + state[10] = blake2b_IV[2]; + state[11] = blake2b_IV[3]; + state[12] = blake2b_IV[4]; + state[13] = blake2b_IV[5]; + state[14] = blake2b_IV[6]; + state[15] = blake2b_IV[7]; + +} + +/** + * Execute Blake2b's G function, with all 12 rounds. + * + * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function + */ +__inline static void blake2bLyra(uint64_t *v) { + ROUND_LYRA(0); + ROUND_LYRA(1); + ROUND_LYRA(2); + ROUND_LYRA(3); + ROUND_LYRA(4); + ROUND_LYRA(5); + ROUND_LYRA(6); + ROUND_LYRA(7); + ROUND_LYRA(8); + ROUND_LYRA(9); + ROUND_LYRA(10); + ROUND_LYRA(11); +} + +/** + * Executes a reduced version of Blake2b's G function with only one round + * @param v A 1024-bit (16 uint64_t) array to be processed by Blake2b's G function + */ +__inline static void reducedBlake2bLyra(uint64_t *v) { + ROUND_LYRA(0); +} + +/** + * Performs a squeeze operation, using Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param out Array that will receive the data squeezed + * @param len The number of bytes to be squeezed into the "out" array + */ + void squeeze(uint64_t *state, byte *out, unsigned int len) { + int fullBlocks = len / BLOCK_LEN_BYTES; + byte *ptr = out; + int i; + //Squeezes full blocks + for (i = 0; i < fullBlocks; i++) { + memcpy(ptr, state, BLOCK_LEN_BYTES); + blake2bLyra(state); + ptr += BLOCK_LEN_BYTES; + } + + //Squeezes remaining bytes + memcpy(ptr, state, (len % BLOCK_LEN_BYTES)); +} + +/** + * Performs an absorb operation for a single block (BLOCK_LEN_INT64 words + * of type uint64_t), using Blake2b's G function as the internal permutation + * + * @param state The current state of the sponge + * @param in The block to be absorbed (BLOCK_LEN_INT64 words) + */ +void absorbBlock(uint64_t *state, const uint64_t *in) { + //XORs the first BLOCK_LEN_INT64 words of "in" with the current state + state[0] ^= in[0]; + state[1] ^= in[1]; + state[2] ^= in[2]; + state[3] ^= in[3]; + state[4] ^= in[4]; + state[5] ^= in[5]; + state[6] ^= in[6]; + state[7] ^= in[7]; + state[8] ^= in[8]; + state[9] ^= in[9]; + state[10] ^= in[10]; + state[11] ^= in[11]; + + //Applies the transformation f to the sponge's state + blake2bLyra(state); +} + +/** + * Performs an absorb operation for a single block (BLOCK_LEN_BLAKE2_SAFE_INT64 + * words of type uint64_t), using Blake2b's G function as the internal permutation + * + * @param state The current state of the sponge + * @param in The block to be absorbed (BLOCK_LEN_BLAKE2_SAFE_INT64 words) + */ +void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in) { + //XORs the first BLOCK_LEN_BLAKE2_SAFE_INT64 words of "in" with the current state + state[0] ^= in[0]; + state[1] ^= in[1]; + state[2] ^= in[2]; + state[3] ^= in[3]; + state[4] ^= in[4]; + state[5] ^= in[5]; + state[6] ^= in[6]; + state[7] ^= in[7]; + + //Applies the transformation f to the sponge's state + blake2bLyra(state); + /* + for (int i = 0; i<16; i++) { + printf(" final state %d %08x %08x in %08x %08x\n", i, (uint32_t)(state[i] & 0xFFFFFFFFULL), (uint32_t)(state[i] >> 32), + (uint32_t)(in[i] & 0xFFFFFFFFULL), (uint32_t)(in[i] >> 32)); + } +*/ +} + +/** + * Performs a reduced squeeze operation for a single row, from the highest to + * the lowest index, using the reduced-round Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param rowOut Row to receive the data squeezed + */ +void reducedSqueezeRow0(uint64_t* state, uint64_t* rowOut) { + uint64_t* ptrWord = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to M[0][C-1] + int i; + //M[row][C-1-col] = H.reduced_squeeze() + for (i = 0; i < N_COLS; i++) { + + ptrWord[0] = state[0]; + ptrWord[1] = state[1]; + ptrWord[2] = state[2]; + ptrWord[3] = state[3]; + ptrWord[4] = state[4]; + ptrWord[5] = state[5]; + ptrWord[6] = state[6]; + ptrWord[7] = state[7]; + ptrWord[8] = state[8]; + ptrWord[9] = state[9]; + ptrWord[10] = state[10]; + ptrWord[11] = state[11]; + /* +for (int i = 0; i<12; i++) { + printf(" after reducedSqueezeRow0 %d %08x %08x in %08x %08x\n", i, (uint32_t)(ptrWord[i] & 0xFFFFFFFFULL), (uint32_t)(ptrWord[i] >> 32), + (uint32_t)(state[i] & 0xFFFFFFFFULL), (uint32_t)(state[i] >> 32)); + } +*/ + //Goes to next block (column) that will receive the squeezed data + ptrWord -= BLOCK_LEN_INT64; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + } +} + +/** + * Performs a reduced duplex operation for a single row, from the highest to + * the lowest index, using the reduced-round Blake2b's G function as the + * internal permutation + * + * @param state The current state of the sponge + * @param rowIn Row to feed the sponge + * @param rowOut Row to receive the sponge's output + */ + void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + int i; + + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[prev][col]" + state[0] ^= (ptrWordIn[0]); + state[1] ^= (ptrWordIn[1]); + state[2] ^= (ptrWordIn[2]); + state[3] ^= (ptrWordIn[3]); + state[4] ^= (ptrWordIn[4]); + state[5] ^= (ptrWordIn[5]); + state[6] ^= (ptrWordIn[6]); + state[7] ^= (ptrWordIn[7]); + state[8] ^= (ptrWordIn[8]); + state[9] ^= (ptrWordIn[9]); + state[10] ^= (ptrWordIn[10]); + state[11] ^= (ptrWordIn[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][C-1-col] = M[prev][col] XOR rand + ptrWordOut[0] = ptrWordIn[0] ^ state[0]; + ptrWordOut[1] = ptrWordIn[1] ^ state[1]; + ptrWordOut[2] = ptrWordIn[2] ^ state[2]; + ptrWordOut[3] = ptrWordIn[3] ^ state[3]; + ptrWordOut[4] = ptrWordIn[4] ^ state[4]; + ptrWordOut[5] = ptrWordIn[5] ^ state[5]; + ptrWordOut[6] = ptrWordIn[6] ^ state[6]; + ptrWordOut[7] = ptrWordIn[7] ^ state[7]; + ptrWordOut[8] = ptrWordIn[8] ^ state[8]; + ptrWordOut[9] = ptrWordIn[9] ^ state[9]; + ptrWordOut[10] = ptrWordIn[10] ^ state[10]; + ptrWordOut[11] = ptrWordIn[11] ^ state[11]; + + + //Input: next column (i.e., next block in sequence) + ptrWordIn += BLOCK_LEN_INT64; + //Output: goes to previous column + ptrWordOut -= BLOCK_LEN_INT64; + } +} + +/** + * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., + * the wordwise addition of two columns, ignoring carries between words). The + * output of this operation, "rand", is then used to make + * "M[rowOut][(N_COLS-1)-col] = M[rowIn][col] XOR rand" and + * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left and N_COLS is a system parameter. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ + void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut + (N_COLS-1)*BLOCK_LEN_INT64; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + //Absorbing "M[prev] [+] M[row*]" + state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); + state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); + state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); + state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); + state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); + state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); + state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); + state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); + state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); + state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); + state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); + state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][col] = M[prev][col] XOR rand + ptrWordOut[0] = ptrWordIn[0] ^ state[0]; + ptrWordOut[1] = ptrWordIn[1] ^ state[1]; + ptrWordOut[2] = ptrWordIn[2] ^ state[2]; + ptrWordOut[3] = ptrWordIn[3] ^ state[3]; + ptrWordOut[4] = ptrWordIn[4] ^ state[4]; + ptrWordOut[5] = ptrWordIn[5] ^ state[5]; + ptrWordOut[6] = ptrWordIn[6] ^ state[6]; + ptrWordOut[7] = ptrWordIn[7] ^ state[7]; + ptrWordOut[8] = ptrWordIn[8] ^ state[8]; + ptrWordOut[9] = ptrWordIn[9] ^ state[9]; + ptrWordOut[10] = ptrWordIn[10] ^ state[10]; + ptrWordOut[11] = ptrWordIn[11] ^ state[11]; + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[11]; + ptrWordInOut[1] ^= state[0]; + ptrWordInOut[2] ^= state[1]; + ptrWordInOut[3] ^= state[2]; + ptrWordInOut[4] ^= state[3]; + ptrWordInOut[5] ^= state[4]; + ptrWordInOut[6] ^= state[5]; + ptrWordInOut[7] ^= state[6]; + ptrWordInOut[8] ^= state[7]; + ptrWordInOut[9] ^= state[8]; + ptrWordInOut[10] ^= state[9]; + ptrWordInOut[11] ^= state[10]; + + //Inputs: next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + //Output: goes to previous column + ptrWordOut -= BLOCK_LEN_INT64; + } +} + +/** + * Performs a duplexing operation over "M[rowInOut][col] [+] M[rowIn][col]" (i.e., + * the wordwise addition of two columns, ignoring carries between words). The + * output of this operation, "rand", is then used to make + * "M[rowOut][col] = M[rowOut][col] XOR rand" and + * "M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[prev] [+] M[row*]" + state[0] ^= (ptrWordIn[0] + ptrWordInOut[0]); + state[1] ^= (ptrWordIn[1] + ptrWordInOut[1]); + state[2] ^= (ptrWordIn[2] + ptrWordInOut[2]); + state[3] ^= (ptrWordIn[3] + ptrWordInOut[3]); + state[4] ^= (ptrWordIn[4] + ptrWordInOut[4]); + state[5] ^= (ptrWordIn[5] + ptrWordInOut[5]); + state[6] ^= (ptrWordIn[6] + ptrWordInOut[6]); + state[7] ^= (ptrWordIn[7] + ptrWordInOut[7]); + state[8] ^= (ptrWordIn[8] + ptrWordInOut[8]); + state[9] ^= (ptrWordIn[9] + ptrWordInOut[9]); + state[10] ^= (ptrWordIn[10] + ptrWordInOut[10]); + state[11] ^= (ptrWordIn[11] + ptrWordInOut[11]); + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[rowOut][col] = M[rowOut][col] XOR rand + ptrWordOut[0] ^= state[0]; + ptrWordOut[1] ^= state[1]; + ptrWordOut[2] ^= state[2]; + ptrWordOut[3] ^= state[3]; + ptrWordOut[4] ^= state[4]; + ptrWordOut[5] ^= state[5]; + ptrWordOut[6] ^= state[6]; + ptrWordOut[7] ^= state[7]; + ptrWordOut[8] ^= state[8]; + ptrWordOut[9] ^= state[9]; + ptrWordOut[10] ^= state[10]; + ptrWordOut[11] ^= state[11]; + + //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[11]; + ptrWordInOut[1] ^= state[0]; + ptrWordInOut[2] ^= state[1]; + ptrWordInOut[3] ^= state[2]; + ptrWordInOut[4] ^= state[3]; + ptrWordInOut[5] ^= state[4]; + ptrWordInOut[6] ^= state[5]; + ptrWordInOut[7] ^= state[6]; + ptrWordInOut[8] ^= state[7]; + ptrWordInOut[9] ^= state[8]; + ptrWordInOut[10] ^= state[9]; + ptrWordInOut[11] ^= state[10]; + + //Goes to next block + ptrWordOut += BLOCK_LEN_INT64; + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + } +} + + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Performs a duplex operation over "M[rowInOut] [+] M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +inline void reducedDuplexRowSetupOLD(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] ^ ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] ^ ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] ^ ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] ^ ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] ^ ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] ^ ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] ^ ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] ^ ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] ^ ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] ^ ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] ^ ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] ^ ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[row][col] = rand + ptrWordOut[0] = state[0]; + ptrWordOut[1] = state[1]; + ptrWordOut[2] = state[2]; + ptrWordOut[3] = state[3]; + ptrWordOut[4] = state[4]; + ptrWordOut[5] = state[5]; + ptrWordOut[6] = state[6]; + ptrWordOut[7] = state[7]; + ptrWordOut[8] = state[8]; + ptrWordOut[9] = state[9]; + ptrWordOut[10] = state[10]; + ptrWordOut[11] = state[11]; + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +inline void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", writing the output "rand" + * on M[rowOut] and making "M[rowInOut] = M[rowInOut] XOR rotW(rand)", where rotW is a 64-bit + * rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +inline void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordOut = rowOut; + int i; + + for (i = 0; i < N_COLS / 2; i++) { + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += 2 * BLOCK_LEN_INT64; + } + + ptrWordOut = rowOut + BLOCK_LEN_INT64; + for (i = 0; i < N_COLS / 2; i++) { + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + + //M[row*][col] = M[row*][col] XOR rotW(rand) + ptrWordInOut[0] ^= state[10]; + ptrWordInOut[1] ^= state[11]; + ptrWordInOut[2] ^= state[0]; + ptrWordInOut[3] ^= state[1]; + ptrWordInOut[4] ^= state[2]; + ptrWordInOut[5] ^= state[3]; + ptrWordInOut[6] ^= state[4]; + ptrWordInOut[7] ^= state[5]; + ptrWordInOut[8] ^= state[6]; + ptrWordInOut[9] ^= state[7]; + ptrWordInOut[10] ^= state[8]; + ptrWordInOut[11] ^= state[9]; + + + //M[row][col] = rand + ptrWordOut[0] = state[0] ^ ptrWordIn[0]; + ptrWordOut[1] = state[1] ^ ptrWordIn[1]; + ptrWordOut[2] = state[2] ^ ptrWordIn[2]; + ptrWordOut[3] = state[3] ^ ptrWordIn[3]; + ptrWordOut[4] = state[4] ^ ptrWordIn[4]; + ptrWordOut[5] = state[5] ^ ptrWordIn[5]; + ptrWordOut[6] = state[6] ^ ptrWordIn[6]; + ptrWordOut[7] = state[7] ^ ptrWordIn[7]; + ptrWordOut[8] = state[8] ^ ptrWordIn[8]; + ptrWordOut[9] = state[9] ^ ptrWordIn[9]; + ptrWordOut[10] = state[10] ^ ptrWordIn[10]; + ptrWordOut[11] = state[11] ^ ptrWordIn[11]; + + //Goes to next column (i.e., next block in sequence) + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + ptrWordOut += 2 * BLOCK_LEN_INT64; + } +} +*/ + +/** + * Performs a duplex operation over "M[rowInOut] XOR M[rowIn]", using the output "rand" + * to make "M[rowOut][col] = M[rowOut][col] XOR rand" and "M[rowInOut] = M[rowInOut] XOR rotW(rand)", + * where rotW is a 64-bit rotation to the left. + * + * @param state The current state of the sponge + * @param rowIn Row used only as input + * @param rowInOut Row used as input and to receive output after rotation + * @param rowOut Row receiving the output + * + */ +/* +inline void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut) { + uint64_t* ptrWordInOut = rowInOut; //In Lyra2: pointer to row* + uint64_t* ptrWordIn = rowIn; //In Lyra2: pointer to prev + uint64_t* ptrWordOut = rowOut; //In Lyra2: pointer to row + int i; + for (i = 0; i < N_COLS; i++) { + + //Absorbing "M[rowInOut] XOR M[rowIn]" + state[0] ^= ptrWordInOut[0] + ptrWordIn[0]; + state[1] ^= ptrWordInOut[1] + ptrWordIn[1]; + state[2] ^= ptrWordInOut[2] + ptrWordIn[2]; + state[3] ^= ptrWordInOut[3] + ptrWordIn[3]; + state[4] ^= ptrWordInOut[4] + ptrWordIn[4]; + state[5] ^= ptrWordInOut[5] + ptrWordIn[5]; + state[6] ^= ptrWordInOut[6] + ptrWordIn[6]; + state[7] ^= ptrWordInOut[7] + ptrWordIn[7]; + state[8] ^= ptrWordInOut[8] + ptrWordIn[8]; + state[9] ^= ptrWordInOut[9] + ptrWordIn[9]; + state[10] ^= ptrWordInOut[10] + ptrWordIn[10]; + state[11] ^= ptrWordInOut[11] + ptrWordIn[11]; + + //Applies the reduced-round transformation f to the sponge's state + reducedBlake2bLyra(state); + + //M[rowOut][col] = M[rowOut][col] XOR rand + ptrWordOut[0] ^= state[0]; + ptrWordOut[1] ^= state[1]; + ptrWordOut[2] ^= state[2]; + ptrWordOut[3] ^= state[3]; + ptrWordOut[4] ^= state[4]; + ptrWordOut[5] ^= state[5]; + ptrWordOut[6] ^= state[6]; + ptrWordOut[7] ^= state[7]; + ptrWordOut[8] ^= state[8]; + ptrWordOut[9] ^= state[9]; + ptrWordOut[10] ^= state[10]; + ptrWordOut[11] ^= state[11]; + + //M[rowInOut][col] = M[rowInOut][col] XOR rotW(rand) + + + //Goes to next block + ptrWordOut += BLOCK_LEN_INT64; + ptrWordInOut += BLOCK_LEN_INT64; + ptrWordIn += BLOCK_LEN_INT64; + } +} +*/ + +/** + Prints an array of unsigned chars + */ +void printArray(unsigned char *array, unsigned int size, char *name) { + int i; + printf("%s: ", name); + for (i = 0; i < size; i++) { + printf("%2x|", array[i]); + } + printf("\n"); +} + +//////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sph/Sponge.h b/sph/Sponge.h new file mode 100644 index 0000000000..0e5745dadc --- /dev/null +++ b/sph/Sponge.h @@ -0,0 +1,108 @@ +/** + * Header file for Blake2b's internal permutation in the form of a sponge. + * This code is based on the original Blake2b's implementation provided by + * Samuel Neves (https://blake2.net/) + * + * Author: The Lyra PHC team (http://www.lyra-kdf.net/) -- 2014. + * + * This software is hereby placed in the public domain. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS + * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef SPONGE_H_ +#define SPONGE_H_ + +#include + +#if defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(32))) +#elif defined(_MSC_VER) +#define ALIGN __declspec(align(32)) +#else +#define ALIGN +#endif + + +/*Blake2b IV Array*/ +static const uint64_t blake2b_IV[8] = +{ + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +/*Blake2b's rotation*/ +static __inline uint64_t rotr64( const uint64_t w, const unsigned c ){ + return ( w >> c ) | ( w << ( 64 - c ) ); +} + +/*Blake2b's G function*/ +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + + +/*One Round of the Blake2b's compression function*/ +#define ROUND_LYRA(r) \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); + + +//---- Housekeeping +void initState(uint64_t state[/*16*/]); + +//---- Squeezes +void squeeze(uint64_t *state, unsigned char *out, unsigned int len); +void reducedSqueezeRow0(uint64_t* state, uint64_t* row); + +//---- Absorbs +void absorbBlock(uint64_t *state, const uint64_t *in); +void absorbBlockBlake2Safe(uint64_t *state, const uint64_t *in); + +//---- Duplexes +void reducedDuplexRow1(uint64_t *state, uint64_t *rowIn, uint64_t *rowOut); +void reducedDuplexRowSetup(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +void reducedDuplexRow(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); + +//---- Misc +void printArray(unsigned char *array, unsigned int size, char *name); + +//////////////////////////////////////////////////////////////////////////////////////////////// + + +////TESTS//// +//void reducedDuplexRowc(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowd(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv4(uint64_t *state, uint64_t *rowIn1, uint64_t *rowIn2, uint64_t *rowOut1, uint64_t *rowOut2); +//void reducedDuplexRowSetupv5(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv5c(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +//void reducedDuplexRowSetupv5d(uint64_t *state, uint64_t *rowIn, uint64_t *rowInOut, uint64_t *rowOut); +///////////// + + +#endif /* SPONGE_H_ */ diff --git a/sph/haval.c b/sph/haval.c new file mode 100644 index 0000000000..f9a8918760 --- /dev/null +++ b/sph/haval.c @@ -0,0 +1,983 @@ +/* $Id: haval.c 227 2010-06-16 17:28:38Z tp $ */ +/* + * HAVAL implementation. + * + * The HAVAL reference paper is of questionable clarity with regards to + * some details such as endianness of bits within a byte, bytes within + * a 32-bit word, or the actual ordering of words within a stream of + * words. This implementation has been made compatible with the reference + * implementation available on: http://labs.calyptix.com/haval.php + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_haval.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAVAL +#define SPH_SMALL_FOOTPRINT_HAVAL 1 +#endif + +/* + * Basic definition from the reference paper. + * +#define F1(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ ((x0) & (x1)) ^ (x0)) + * + */ + +#define F1(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & ((x0) ^ (x4))) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ (x0)) + +/* + * Basic definition from the reference paper. + * +#define F2(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x2) & (x3)) ^ ((x2) & (x4) & (x5)) ^ ((x1) & (x2)) \ + ^ ((x1) & (x4)) ^ ((x2) & (x6)) ^ ((x3) & (x5)) \ + ^ ((x4) & (x5)) ^ ((x0) & (x2)) ^ (x0)) + * + */ + +#define F2(x6, x5, x4, x3, x2, x1, x0) \ + (((x2) & (((x1) & ~(x3)) ^ ((x4) & (x5)) ^ (x6) ^ (x0))) \ + ^ ((x4) & ((x1) ^ (x5))) ^ ((x3 & (x5)) ^ (x0))) + +/* + * Basic definition from the reference paper. + * +#define F3(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x2) & (x3)) ^ ((x1) & (x4)) ^ ((x2) & (x5)) \ + ^ ((x3) & (x6)) ^ ((x0) & (x3)) ^ (x0)) + * + */ + +#define F3(x6, x5, x4, x3, x2, x1, x0) \ + (((x3) & (((x1) & (x2)) ^ (x6) ^ (x0))) \ + ^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ (x0)) + +/* + * Basic definition from the reference paper. + * +#define F4(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x2) & (x3)) ^ ((x2) & (x4) & (x5)) ^ ((x3) & (x4) & (x6)) \ + ^ ((x1) & (x4)) ^ ((x2) & (x6)) ^ ((x3) & (x4)) ^ ((x3) & (x5)) \ + ^ ((x3) & (x6)) ^ ((x4) & (x5)) ^ ((x4) & (x6)) ^ ((x0) & (x4)) ^ (x0)) + * + */ + +#define F4(x6, x5, x4, x3, x2, x1, x0) \ + (((x3) & (((x1) & (x2)) ^ ((x4) | (x6)) ^ (x5))) \ + ^ ((x4) & ((~(x2) & (x5)) ^ (x1) ^ (x6) ^ (x0))) \ + ^ ((x2) & (x6)) ^ (x0)) + +/* + * Basic definition from the reference paper. + * +#define F5(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)) \ + ^ ((x0) & (x1) & (x2) & (x3)) ^ ((x0) & (x5)) ^ (x0)) + * + */ + +#define F5(x6, x5, x4, x3, x2, x1, x0) \ + (((x0) & ~(((x1) & (x2) & (x3)) ^ (x5))) \ + ^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6))) + +/* + * The macros below integrate the phi() permutations, depending on the + * pass and the total number of passes. + */ + +#define FP3_1(x6, x5, x4, x3, x2, x1, x0) \ + F1(x1, x0, x3, x5, x6, x2, x4) +#define FP3_2(x6, x5, x4, x3, x2, x1, x0) \ + F2(x4, x2, x1, x0, x5, x3, x6) +#define FP3_3(x6, x5, x4, x3, x2, x1, x0) \ + F3(x6, x1, x2, x3, x4, x5, x0) + +#define FP4_1(x6, x5, x4, x3, x2, x1, x0) \ + F1(x2, x6, x1, x4, x5, x3, x0) +#define FP4_2(x6, x5, x4, x3, x2, x1, x0) \ + F2(x3, x5, x2, x0, x1, x6, x4) +#define FP4_3(x6, x5, x4, x3, x2, x1, x0) \ + F3(x1, x4, x3, x6, x0, x2, x5) +#define FP4_4(x6, x5, x4, x3, x2, x1, x0) \ + F4(x6, x4, x0, x5, x2, x1, x3) + +#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \ + F1(x3, x4, x1, x0, x5, x2, x6) +#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \ + F2(x6, x2, x1, x0, x3, x4, x5) +#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \ + F3(x2, x6, x0, x4, x3, x1, x5) +#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \ + F4(x1, x5, x3, x2, x0, x4, x6) +#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \ + F5(x2, x5, x0, x6, x4, x3, x1) + +/* + * One step, for "n" passes, pass number "p" (1 <= p <= n), using + * input word number "w" and step constant "c". + */ +#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) do { \ + sph_u32 t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \ + (x7) = SPH_T32(SPH_ROTR32(t, 7) + SPH_ROTR32((x7), 11) \ + + (w) + (c)); \ + } while (0) + +/* + * PASSy(n, in) computes pass number "y", for a total of "n", using the + * one-argument macro "in" to access input words. Current state is assumed + * to be held in variables "s0" to "s7". + */ + +#if SPH_SMALL_FOOTPRINT_HAVAL + +#define PASS1(n, in) do { \ + unsigned pass_count; \ + for (pass_count = 0; pass_count < 32; pass_count += 8) { \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, \ + in(pass_count + 0), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, \ + in(pass_count + 1), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, \ + in(pass_count + 2), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, \ + in(pass_count + 3), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, \ + in(pass_count + 4), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, \ + in(pass_count + 5), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, \ + in(pass_count + 6), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, \ + in(pass_count + 7), SPH_C32(0x00000000)); \ + } \ + } while (0) + +#define PASSG(p, n, in) do { \ + unsigned pass_count; \ + for (pass_count = 0; pass_count < 32; pass_count += 8) { \ + STEP(n, p, s7, s6, s5, s4, s3, s2, s1, s0, \ + in(MP ## p[pass_count + 0]), \ + RK ## p[pass_count + 0]); \ + STEP(n, p, s6, s5, s4, s3, s2, s1, s0, s7, \ + in(MP ## p[pass_count + 1]), \ + RK ## p[pass_count + 1]); \ + STEP(n, p, s5, s4, s3, s2, s1, s0, s7, s6, \ + in(MP ## p[pass_count + 2]), \ + RK ## p[pass_count + 2]); \ + STEP(n, p, s4, s3, s2, s1, s0, s7, s6, s5, \ + in(MP ## p[pass_count + 3]), \ + RK ## p[pass_count + 3]); \ + STEP(n, p, s3, s2, s1, s0, s7, s6, s5, s4, \ + in(MP ## p[pass_count + 4]), \ + RK ## p[pass_count + 4]); \ + STEP(n, p, s2, s1, s0, s7, s6, s5, s4, s3, \ + in(MP ## p[pass_count + 5]), \ + RK ## p[pass_count + 5]); \ + STEP(n, p, s1, s0, s7, s6, s5, s4, s3, s2, \ + in(MP ## p[pass_count + 6]), \ + RK ## p[pass_count + 6]); \ + STEP(n, p, s0, s7, s6, s5, s4, s3, s2, s1, \ + in(MP ## p[pass_count + 7]), \ + RK ## p[pass_count + 7]); \ + } \ + } while (0) + +#define PASS2(n, in) PASSG(2, n, in) +#define PASS3(n, in) PASSG(3, n, in) +#define PASS4(n, in) PASSG(4, n, in) +#define PASS5(n, in) PASSG(5, n, in) + +static const unsigned MP2[32] = { + 5, 14, 26, 18, 11, 28, 7, 16, + 0, 23, 20, 22, 1, 10, 4, 8, + 30, 3, 21, 9, 17, 24, 29, 6, + 19, 12, 15, 13, 2, 25, 31, 27 +}; + +static const unsigned MP3[32] = { + 19, 9, 4, 20, 28, 17, 8, 22, + 29, 14, 25, 12, 24, 30, 16, 26, + 31, 15, 7, 3, 1, 0, 18, 27, + 13, 6, 21, 10, 23, 11, 5, 2 +}; + +static const unsigned MP4[32] = { + 24, 4, 0, 14, 2, 7, 28, 23, + 26, 6, 30, 20, 18, 25, 19, 3, + 22, 11, 31, 21, 8, 27, 12, 9, + 1, 29, 5, 15, 17, 10, 16, 13 +}; + +static const unsigned MP5[32] = { + 27, 3, 21, 26, 17, 11, 20, 29, + 19, 0, 12, 7, 13, 8, 31, 10, + 5, 9, 14, 30, 18, 6, 28, 24, + 2, 23, 16, 22, 4, 1, 25, 15 +}; + +static const sph_u32 RK2[32] = { + SPH_C32(0x452821E6), SPH_C32(0x38D01377), + SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C), + SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD), + SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917), + SPH_C32(0x9216D5D9), SPH_C32(0x8979FB1B), + SPH_C32(0xD1310BA6), SPH_C32(0x98DFB5AC), + SPH_C32(0x2FFD72DB), SPH_C32(0xD01ADFB7), + SPH_C32(0xB8E1AFED), SPH_C32(0x6A267E96), + SPH_C32(0xBA7C9045), SPH_C32(0xF12C7F99), + SPH_C32(0x24A19947), SPH_C32(0xB3916CF7), + SPH_C32(0x0801F2E2), SPH_C32(0x858EFC16), + SPH_C32(0x636920D8), SPH_C32(0x71574E69), + SPH_C32(0xA458FEA3), SPH_C32(0xF4933D7E), + SPH_C32(0x0D95748F), SPH_C32(0x728EB658), + SPH_C32(0x718BCD58), SPH_C32(0x82154AEE), + SPH_C32(0x7B54A41D), SPH_C32(0xC25A59B5) +}; + +static const sph_u32 RK3[32] = { + SPH_C32(0x9C30D539), SPH_C32(0x2AF26013), + SPH_C32(0xC5D1B023), SPH_C32(0x286085F0), + SPH_C32(0xCA417918), SPH_C32(0xB8DB38EF), + SPH_C32(0x8E79DCB0), SPH_C32(0x603A180E), + SPH_C32(0x6C9E0E8B), SPH_C32(0xB01E8A3E), + SPH_C32(0xD71577C1), SPH_C32(0xBD314B27), + SPH_C32(0x78AF2FDA), SPH_C32(0x55605C60), + SPH_C32(0xE65525F3), SPH_C32(0xAA55AB94), + SPH_C32(0x57489862), SPH_C32(0x63E81440), + SPH_C32(0x55CA396A), SPH_C32(0x2AAB10B6), + SPH_C32(0xB4CC5C34), SPH_C32(0x1141E8CE), + SPH_C32(0xA15486AF), SPH_C32(0x7C72E993), + SPH_C32(0xB3EE1411), SPH_C32(0x636FBC2A), + SPH_C32(0x2BA9C55D), SPH_C32(0x741831F6), + SPH_C32(0xCE5C3E16), SPH_C32(0x9B87931E), + SPH_C32(0xAFD6BA33), SPH_C32(0x6C24CF5C) +}; + +static const sph_u32 RK4[32] = { + SPH_C32(0x7A325381), SPH_C32(0x28958677), + SPH_C32(0x3B8F4898), SPH_C32(0x6B4BB9AF), + SPH_C32(0xC4BFE81B), SPH_C32(0x66282193), + SPH_C32(0x61D809CC), SPH_C32(0xFB21A991), + SPH_C32(0x487CAC60), SPH_C32(0x5DEC8032), + SPH_C32(0xEF845D5D), SPH_C32(0xE98575B1), + SPH_C32(0xDC262302), SPH_C32(0xEB651B88), + SPH_C32(0x23893E81), SPH_C32(0xD396ACC5), + SPH_C32(0x0F6D6FF3), SPH_C32(0x83F44239), + SPH_C32(0x2E0B4482), SPH_C32(0xA4842004), + SPH_C32(0x69C8F04A), SPH_C32(0x9E1F9B5E), + SPH_C32(0x21C66842), SPH_C32(0xF6E96C9A), + SPH_C32(0x670C9C61), SPH_C32(0xABD388F0), + SPH_C32(0x6A51A0D2), SPH_C32(0xD8542F68), + SPH_C32(0x960FA728), SPH_C32(0xAB5133A3), + SPH_C32(0x6EEF0B6C), SPH_C32(0x137A3BE4) +}; + +static const sph_u32 RK5[32] = { + SPH_C32(0xBA3BF050), SPH_C32(0x7EFB2A98), + SPH_C32(0xA1F1651D), SPH_C32(0x39AF0176), + SPH_C32(0x66CA593E), SPH_C32(0x82430E88), + SPH_C32(0x8CEE8619), SPH_C32(0x456F9FB4), + SPH_C32(0x7D84A5C3), SPH_C32(0x3B8B5EBE), + SPH_C32(0xE06F75D8), SPH_C32(0x85C12073), + SPH_C32(0x401A449F), SPH_C32(0x56C16AA6), + SPH_C32(0x4ED3AA62), SPH_C32(0x363F7706), + SPH_C32(0x1BFEDF72), SPH_C32(0x429B023D), + SPH_C32(0x37D0D724), SPH_C32(0xD00A1248), + SPH_C32(0xDB0FEAD3), SPH_C32(0x49F1C09B), + SPH_C32(0x075372C9), SPH_C32(0x80991B7B), + SPH_C32(0x25D479D8), SPH_C32(0xF6E8DEF7), + SPH_C32(0xE3FE501A), SPH_C32(0xB6794C3B), + SPH_C32(0x976CE0BD), SPH_C32(0x04C006BA), + SPH_C32(0xC1A94FB6), SPH_C32(0x409F60C4) +}; + +#else + +#define PASS1(n, in) do { \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in( 0), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in( 1), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in( 2), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in( 3), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in( 4), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in( 5), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in( 6), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in( 7), SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in( 8), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(10), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(11), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(12), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(13), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(14), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(15), SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in(16), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in(17), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(18), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(19), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(20), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(21), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(22), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(23), SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in(24), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in(25), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(26), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(27), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(28), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(29), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(30), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(31), SPH_C32(0x00000000)); \ + } while (0) + +#define PASS2(n, in) do { \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in( 5), SPH_C32(0x452821E6)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(14), SPH_C32(0x38D01377)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(26), SPH_C32(0xBE5466CF)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(18), SPH_C32(0x34E90C6C)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in(11), SPH_C32(0xC0AC29B7)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(28), SPH_C32(0xC97C50DD)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in( 7), SPH_C32(0x3F84D5B5)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in(16), SPH_C32(0xB5470917)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in( 0), SPH_C32(0x9216D5D9)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(23), SPH_C32(0x8979FB1B)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(20), SPH_C32(0xD1310BA6)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(22), SPH_C32(0x98DFB5AC)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in( 1), SPH_C32(0x2FFD72DB)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(10), SPH_C32(0xD01ADFB7)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in( 4), SPH_C32(0xB8E1AFED)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in( 8), SPH_C32(0x6A267E96)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in(30), SPH_C32(0xBA7C9045)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in( 3), SPH_C32(0xF12C7F99)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0x24A19947)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in( 9), SPH_C32(0xB3916CF7)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x0801F2E2)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(24), SPH_C32(0x858EFC16)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in(29), SPH_C32(0x636920D8)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in( 6), SPH_C32(0x71574E69)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0xA458FEA3)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(12), SPH_C32(0xF4933D7E)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(15), SPH_C32(0x0D95748F)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(13), SPH_C32(0x728EB658)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in( 2), SPH_C32(0x718BCD58)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(25), SPH_C32(0x82154AEE)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in(31), SPH_C32(0x7B54A41D)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in(27), SPH_C32(0xC25A59B5)); \ + } while (0) + +#define PASS3(n, in) do { \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0x9C30D539)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x2AF26013)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in( 4), SPH_C32(0xC5D1B023)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(20), SPH_C32(0x286085F0)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(28), SPH_C32(0xCA417918)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(17), SPH_C32(0xB8DB38EF)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in( 8), SPH_C32(0x8E79DCB0)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(22), SPH_C32(0x603A180E)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(29), SPH_C32(0x6C9E0E8B)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in(14), SPH_C32(0xB01E8A3E)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in(25), SPH_C32(0xD71577C1)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(12), SPH_C32(0xBD314B27)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(24), SPH_C32(0x78AF2FDA)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(30), SPH_C32(0x55605C60)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in(16), SPH_C32(0xE65525F3)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(26), SPH_C32(0xAA55AB94)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(31), SPH_C32(0x57489862)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in(15), SPH_C32(0x63E81440)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in( 7), SPH_C32(0x55CA396A)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in( 3), SPH_C32(0x2AAB10B6)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in( 1), SPH_C32(0xB4CC5C34)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in( 0), SPH_C32(0x1141E8CE)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in(18), SPH_C32(0xA15486AF)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(27), SPH_C32(0x7C72E993)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(13), SPH_C32(0xB3EE1411)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in( 6), SPH_C32(0x636FBC2A)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0x2BA9C55D)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(10), SPH_C32(0x741831F6)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(23), SPH_C32(0xCE5C3E16)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(11), SPH_C32(0x9B87931E)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in( 5), SPH_C32(0xAFD6BA33)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in( 2), SPH_C32(0x6C24CF5C)); \ + } while (0) + +#define PASS4(n, in) do { \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(24), SPH_C32(0x7A325381)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in( 4), SPH_C32(0x28958677)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in( 0), SPH_C32(0x3B8F4898)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(14), SPH_C32(0x6B4BB9AF)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in( 2), SPH_C32(0xC4BFE81B)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in( 7), SPH_C32(0x66282193)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(28), SPH_C32(0x61D809CC)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in(23), SPH_C32(0xFB21A991)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(26), SPH_C32(0x487CAC60)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in( 6), SPH_C32(0x5DEC8032)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in(30), SPH_C32(0xEF845D5D)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(20), SPH_C32(0xE98575B1)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in(18), SPH_C32(0xDC262302)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(25), SPH_C32(0xEB651B88)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(19), SPH_C32(0x23893E81)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in( 3), SPH_C32(0xD396ACC5)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(22), SPH_C32(0x0F6D6FF3)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in(11), SPH_C32(0x83F44239)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in(31), SPH_C32(0x2E0B4482)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(21), SPH_C32(0xA4842004)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in( 8), SPH_C32(0x69C8F04A)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(27), SPH_C32(0x9E1F9B5E)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(12), SPH_C32(0x21C66842)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in( 9), SPH_C32(0xF6E96C9A)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in( 1), SPH_C32(0x670C9C61)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in(29), SPH_C32(0xABD388F0)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in( 5), SPH_C32(0x6A51A0D2)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(15), SPH_C32(0xD8542F68)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x960FA728)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(10), SPH_C32(0xAB5133A3)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(16), SPH_C32(0x6EEF0B6C)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in(13), SPH_C32(0x137A3BE4)); \ + } while (0) + +#define PASS5(n, in) do { \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in(27), SPH_C32(0xBA3BF050)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 3), SPH_C32(0x7EFB2A98)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0xA1F1651D)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(26), SPH_C32(0x39AF0176)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x66CA593E)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in(11), SPH_C32(0x82430E88)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(20), SPH_C32(0x8CEE8619)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(29), SPH_C32(0x456F9FB4)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0x7D84A5C3)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 0), SPH_C32(0x3B8B5EBE)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(12), SPH_C32(0xE06F75D8)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in( 7), SPH_C32(0x85C12073)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(13), SPH_C32(0x401A449F)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 8), SPH_C32(0x56C16AA6)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(31), SPH_C32(0x4ED3AA62)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(10), SPH_C32(0x363F7706)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in( 5), SPH_C32(0x1BFEDF72)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x429B023D)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(14), SPH_C32(0x37D0D724)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(30), SPH_C32(0xD00A1248)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(18), SPH_C32(0xDB0FEAD3)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 6), SPH_C32(0x49F1C09B)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(28), SPH_C32(0x075372C9)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(24), SPH_C32(0x80991B7B)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in( 2), SPH_C32(0x25D479D8)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in(23), SPH_C32(0xF6E8DEF7)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(16), SPH_C32(0xE3FE501A)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(22), SPH_C32(0xB6794C3B)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in( 4), SPH_C32(0x976CE0BD)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 1), SPH_C32(0x04C006BA)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(25), SPH_C32(0xC1A94FB6)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(15), SPH_C32(0x409F60C4)); \ + } while (0) + +#endif + +#define SAVE_STATE \ + sph_u32 u0, u1, u2, u3, u4, u5, u6, u7; \ + do { \ + u0 = s0; \ + u1 = s1; \ + u2 = s2; \ + u3 = s3; \ + u4 = s4; \ + u5 = s5; \ + u6 = s6; \ + u7 = s7; \ + } while (0) + +#define UPDATE_STATE do { \ + s0 = SPH_T32(s0 + u0); \ + s1 = SPH_T32(s1 + u1); \ + s2 = SPH_T32(s2 + u2); \ + s3 = SPH_T32(s3 + u3); \ + s4 = SPH_T32(s4 + u4); \ + s5 = SPH_T32(s5 + u5); \ + s6 = SPH_T32(s6 + u6); \ + s7 = SPH_T32(s7 + u7); \ + } while (0) + +/* + * COREn(in) performs the core HAVAL computation for "n" passes, using + * the one-argument macro "in" to access the input words. Running state + * is held in variable "s0" to "s7". + */ + +#define CORE3(in) do { \ + SAVE_STATE; \ + PASS1(3, in); \ + PASS2(3, in); \ + PASS3(3, in); \ + UPDATE_STATE; \ + } while (0) + +#define CORE4(in) do { \ + SAVE_STATE; \ + PASS1(4, in); \ + PASS2(4, in); \ + PASS3(4, in); \ + PASS4(4, in); \ + UPDATE_STATE; \ + } while (0) + +#define CORE5(in) do { \ + SAVE_STATE; \ + PASS1(5, in); \ + PASS2(5, in); \ + PASS3(5, in); \ + PASS4(5, in); \ + PASS5(5, in); \ + UPDATE_STATE; \ + } while (0) + +/* + * DSTATE declares the state variables "s0" to "s7". + */ +#define DSTATE sph_u32 s0, s1, s2, s3, s4, s5, s6, s7 + +/* + * RSTATE fills the state variables from the context "sc". + */ +#define RSTATE do { \ + s0 = sc->s0; \ + s1 = sc->s1; \ + s2 = sc->s2; \ + s3 = sc->s3; \ + s4 = sc->s4; \ + s5 = sc->s5; \ + s6 = sc->s6; \ + s7 = sc->s7; \ + } while (0) + +/* + * WSTATE updates the context "sc" from the state variables. + */ +#define WSTATE do { \ + sc->s0 = s0; \ + sc->s1 = s1; \ + sc->s2 = s2; \ + sc->s3 = s3; \ + sc->s4 = s4; \ + sc->s5 = s5; \ + sc->s6 = s6; \ + sc->s7 = s7; \ + } while (0) + +/* + * Initialize a context. "olen" is the output length, in 32-bit words + * (between 4 and 8, inclusive). "passes" is the number of passes + * (3, 4 or 5). + */ +static void +haval_init(sph_haval_context *sc, unsigned olen, unsigned passes) +{ + sc->s0 = SPH_C32(0x243F6A88); + sc->s1 = SPH_C32(0x85A308D3); + sc->s2 = SPH_C32(0x13198A2E); + sc->s3 = SPH_C32(0x03707344); + sc->s4 = SPH_C32(0xA4093822); + sc->s5 = SPH_C32(0x299F31D0); + sc->s6 = SPH_C32(0x082EFA98); + sc->s7 = SPH_C32(0xEC4E6C89); + sc->olen = olen; + sc->passes = passes; +#if SPH_64 + sc->count = 0; +#else + sc->count_high = 0; + sc->count_low = 0; +#endif + +} + +/* + * IN_PREPARE(data) contains declarations and code to prepare for + * reading input words pointed to by "data". + * INW(i) reads the word number "i" (from 0 to 31). + */ +#if SPH_LITTLE_FAST +#define IN_PREPARE(indata) const unsigned char *const load_ptr = \ + (const unsigned char *)(indata) +#define INW(i) sph_dec32le_aligned(load_ptr + 4 * (i)) +#else +#define IN_PREPARE(indata) \ + sph_u32 X_var[32]; \ + int load_index; \ + \ + for (load_index = 0; load_index < 32; load_index ++) \ + X_var[load_index] = sph_dec32le_aligned( \ + (const unsigned char *)(indata) + 4 * load_index) +#define INW(i) X_var[i] +#endif + +/* + * Mixing operation used for 128-bit output tailoring. This function + * takes the byte 0 from a0, byte 1 from a1, byte 2 from a2 and byte 3 + * from a3, and combines them into a 32-bit word, which is then rotated + * to the left by n bits. + */ +static SPH_INLINE sph_u32 +mix128(sph_u32 a0, sph_u32 a1, sph_u32 a2, sph_u32 a3, int n) +{ + sph_u32 tmp; + + tmp = (a0 & SPH_C32(0x000000FF)) + | (a1 & SPH_C32(0x0000FF00)) + | (a2 & SPH_C32(0x00FF0000)) + | (a3 & SPH_C32(0xFF000000)); + if (n > 0) + tmp = SPH_ROTL32(tmp, n); + return tmp; +} + +/* + * Mixing operation used to compute output word 0 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_0(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0x01F80000)) + | (x6 & SPH_C32(0xFE000000)) + | (x7 & SPH_C32(0x0000003F)); + return SPH_ROTL32(tmp, 13); +} + +/* + * Mixing operation used to compute output word 1 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_1(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0xFE000000)) + | (x6 & SPH_C32(0x0000003F)) + | (x7 & SPH_C32(0x00000FC0)); + return SPH_ROTL32(tmp, 7); +} + +/* + * Mixing operation used to compute output word 2 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_2(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0x0000003F)) + | (x6 & SPH_C32(0x00000FC0)) + | (x7 & SPH_C32(0x0007F000)); + return tmp; +} + +/* + * Mixing operation used to compute output word 3 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_3(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0x00000FC0)) + | (x6 & SPH_C32(0x0007F000)) + | (x7 & SPH_C32(0x01F80000)); + return tmp >> 6; +} + +/* + * Mixing operation used to compute output word 4 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_4(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0x0007F000)) + | (x6 & SPH_C32(0x01F80000)) + | (x7 & SPH_C32(0xFE000000)); + return tmp >> 12; +} + +/* + * Mixing operation used to compute output word 0 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_0(sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x6 & SPH_C32(0xFC000000)) | (x7 & SPH_C32(0x0000001F)); + return SPH_ROTL32(tmp, 6); +} + +/* + * Mixing operation used to compute output word 1 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_1(sph_u32 x6, sph_u32 x7) +{ + return (x6 & SPH_C32(0x0000001F)) | (x7 & SPH_C32(0x000003E0)); +} + +/* + * Mixing operation used to compute output word 2 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_2(sph_u32 x6, sph_u32 x7) +{ + return ((x6 & SPH_C32(0x000003E0)) | (x7 & SPH_C32(0x0000FC00))) >> 5; +} + +/* + * Mixing operation used to compute output word 3 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_3(sph_u32 x6, sph_u32 x7) +{ + return ((x6 & SPH_C32(0x0000FC00)) | (x7 & SPH_C32(0x001F0000))) >> 10; +} + +/* + * Mixing operation used to compute output word 4 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_4(sph_u32 x6, sph_u32 x7) +{ + return ((x6 & SPH_C32(0x001F0000)) | (x7 & SPH_C32(0x03E00000))) >> 16; +} + +/* + * Mixing operation used to compute output word 5 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_5(sph_u32 x6, sph_u32 x7) +{ + return ((x6 & SPH_C32(0x03E00000)) | (x7 & SPH_C32(0xFC000000))) >> 21; +} + +/* + * Write out HAVAL output. The output length is tailored to the requested + * length. + */ +static void +haval_out(sph_haval_context *sc, void *dst) +{ + DSTATE; + unsigned char *buf; + + buf = dst; + RSTATE; + switch (sc->olen) { + case 4: + sph_enc32le(buf, SPH_T32(s0 + mix128(s7, s4, s5, s6, 24))); + sph_enc32le(buf + 4, SPH_T32(s1 + mix128(s6, s7, s4, s5, 16))); + sph_enc32le(buf + 8, SPH_T32(s2 + mix128(s5, s6, s7, s4, 8))); + sph_enc32le(buf + 12, SPH_T32(s3 + mix128(s4, s5, s6, s7, 0))); + break; + case 5: + sph_enc32le(buf, SPH_T32(s0 + mix160_0(s5, s6, s7))); + sph_enc32le(buf + 4, SPH_T32(s1 + mix160_1(s5, s6, s7))); + sph_enc32le(buf + 8, SPH_T32(s2 + mix160_2(s5, s6, s7))); + sph_enc32le(buf + 12, SPH_T32(s3 + mix160_3(s5, s6, s7))); + sph_enc32le(buf + 16, SPH_T32(s4 + mix160_4(s5, s6, s7))); + break; + case 6: + sph_enc32le(buf, SPH_T32(s0 + mix192_0(s6, s7))); + sph_enc32le(buf + 4, SPH_T32(s1 + mix192_1(s6, s7))); + sph_enc32le(buf + 8, SPH_T32(s2 + mix192_2(s6, s7))); + sph_enc32le(buf + 12, SPH_T32(s3 + mix192_3(s6, s7))); + sph_enc32le(buf + 16, SPH_T32(s4 + mix192_4(s6, s7))); + sph_enc32le(buf + 20, SPH_T32(s5 + mix192_5(s6, s7))); + break; + case 7: + sph_enc32le(buf, SPH_T32(s0 + ((s7 >> 27) & 0x1F))); + sph_enc32le(buf + 4, SPH_T32(s1 + ((s7 >> 22) & 0x1F))); + sph_enc32le(buf + 8, SPH_T32(s2 + ((s7 >> 18) & 0x0F))); + sph_enc32le(buf + 12, SPH_T32(s3 + ((s7 >> 13) & 0x1F))); + sph_enc32le(buf + 16, SPH_T32(s4 + ((s7 >> 9) & 0x0F))); + sph_enc32le(buf + 20, SPH_T32(s5 + ((s7 >> 4) & 0x1F))); + sph_enc32le(buf + 24, SPH_T32(s6 + ((s7 ) & 0x0F))); + break; + case 8: + sph_enc32le(buf, s0); + sph_enc32le(buf + 4, s1); + sph_enc32le(buf + 8, s2); + sph_enc32le(buf + 12, s3); + sph_enc32le(buf + 16, s4); + sph_enc32le(buf + 20, s5); + sph_enc32le(buf + 24, s6); + sph_enc32le(buf + 28, s7); + break; + } +} + +/* + * The main core functions inline the code with the COREx() macros. We + * use a helper file, included three times, which avoids code copying. + */ + +#undef PASSES +#define PASSES 3 +#include "haval_helper.c" + +#undef PASSES +#define PASSES 4 +#include "haval_helper.c" + +#undef PASSES +#define PASSES 5 +#include "haval_helper.c" + +/* ====================================================================== */ + +#define API(xxx, y) \ +void \ +sph_haval ## xxx ## _ ## y ## _init(void *cc) \ +{ \ + haval_init(cc, xxx >> 5, y); \ +} \ + \ +void \ +sph_haval ## xxx ## _ ## y (void *cc, const void *data, size_t len) \ +{ \ + haval ## y(cc, data, len); \ +} \ + \ +void \ +sph_haval ## xxx ## _ ## y ## _close(void *cc, void *dst) \ +{ \ + haval ## y ## _close(cc, 0, 0, dst); \ +} \ + \ +void \ +sph_haval ## xxx ## _ ## y ## addbits_and_close( \ + void *cc, unsigned ub, unsigned n, void *dst) \ +{ \ + haval ## y ## _close(cc, ub, n, dst); \ +} + +API(128, 3) +API(128, 4) +API(128, 5) +API(160, 3) +API(160, 4) +API(160, 5) +API(192, 3) +API(192, 4) +API(192, 5) +API(224, 3) +API(224, 4) +API(224, 5) +API(256, 3) +API(256, 4) +API(256, 5) + +#define RVAL do { \ + s0 = val[0]; \ + s1 = val[1]; \ + s2 = val[2]; \ + s3 = val[3]; \ + s4 = val[4]; \ + s5 = val[5]; \ + s6 = val[6]; \ + s7 = val[7]; \ + } while (0) + +#define WVAL do { \ + val[0] = s0; \ + val[1] = s1; \ + val[2] = s2; \ + val[3] = s3; \ + val[4] = s4; \ + val[5] = s5; \ + val[6] = s6; \ + val[7] = s7; \ + } while (0) + +#define INMSG(i) msg[i] + +/* see sph_haval.h */ +void +sph_haval_3_comp(const sph_u32 msg[32], sph_u32 val[8]) +{ + DSTATE; + + RVAL; + CORE3(INMSG); + WVAL; +} + +/* see sph_haval.h */ +void +sph_haval_4_comp(const sph_u32 msg[32], sph_u32 val[8]) +{ + DSTATE; + + RVAL; + CORE4(INMSG); + WVAL; +} + +/* see sph_haval.h */ +void +sph_haval_5_comp(const sph_u32 msg[32], sph_u32 val[8]) +{ + DSTATE; + + RVAL; + CORE5(INMSG); + WVAL; +} + +#ifdef __cplusplus +} +#endif diff --git a/sph/haval_helper.c b/sph/haval_helper.c new file mode 100644 index 0000000000..a8fe917eb3 --- /dev/null +++ b/sph/haval_helper.c @@ -0,0 +1,190 @@ +/* $Id: haval_helper.c 218 2010-06-08 17:06:34Z tp $ */ +/* + * Helper code, included (three times !) by HAVAL implementation. + * + * TODO: try to merge this with md_helper.c. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#undef SPH_XCAT +#define SPH_XCAT(a, b) SPH_XCAT_(a, b) +#undef SPH_XCAT_ +#define SPH_XCAT_(a, b) a ## b + +static void +#ifdef SPH_UPTR +SPH_XCAT(SPH_XCAT(haval, PASSES), _short) +#else +SPH_XCAT(haval, PASSES) +#endif +(sph_haval_context *sc, const void *data, size_t len) +{ + unsigned current; + +#if SPH_64 + current = (unsigned)sc->count & 127U; +#else + current = (unsigned)sc->count_low & 127U; +#endif + while (len > 0) { + unsigned clen; +#if !SPH_64 + sph_u32 clow, clow2; +#endif + + clen = 128U - current; + if (clen > len) + clen = len; + memcpy(sc->buf + current, data, clen); + data = (const unsigned char *)data + clen; + current += clen; + len -= clen; + if (current == 128U) { + DSTATE; + IN_PREPARE(sc->buf); + RSTATE; + SPH_XCAT(CORE, PASSES)(INW); + WSTATE; + current = 0; + } +#if SPH_64 + sc->count += clen; +#else + clow = sc->count_low; + clow2 = SPH_T32(clow + clen); + sc->count_low = clow2; + if (clow2 < clow) + sc->count_high ++; +#endif + } +} + +#ifdef SPH_UPTR +static void +SPH_XCAT(haval, PASSES)(sph_haval_context *sc, const void *data, size_t len) +{ + unsigned current; + size_t orig_len; +#if !SPH_64 + sph_u32 clow, clow2; +#endif + DSTATE; + + if (len < 256U) { + SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, len); + return; + } +#if SPH_64 + current = (unsigned)sc->count & 127U; +#else + current = (unsigned)sc->count_low & 127U; +#endif + if (current > 0) { + unsigned clen; + clen = 128U - current; + SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, clen); + data = (const unsigned char *)data + clen; + len -= clen; + } +#if !SPH_UNALIGNED + if (((SPH_UPTR)data & 3U) != 0) { + SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, len); + return; + } +#endif + orig_len = len; + RSTATE; + while (len >= 128U) { + IN_PREPARE(data); + SPH_XCAT(CORE, PASSES)(INW); + data = (const unsigned char *)data + 128U; + len -= 128U; + } + WSTATE; + if (len > 0) + memcpy(sc->buf, data, len); +#if SPH_64 + sc->count += (sph_u64)orig_len; +#else + clow = sc->count_low; + clow2 = SPH_T32(clow + orig_len); + sc->count_low = clow2; + if (clow2 < clow) + sc->count_high ++; + orig_len >>= 12; + orig_len >>= 10; + orig_len >>= 10; + sc->count_high += orig_len; +#endif +} +#endif + +static void +SPH_XCAT(SPH_XCAT(haval, PASSES), _close)(sph_haval_context *sc, + unsigned ub, unsigned n, void *dst) +{ + unsigned current,j; + DSTATE; + +#if SPH_64 + current = (unsigned)sc->count & 127U; +#else + current = (unsigned)sc->count_low & 127U; +#endif + sc->buf[current ++] = (0x01 << n) | ((ub & 0xFF) >> (8 - n)); + RSTATE; + if (current > 118U) { + memset(sc->buf + current, 0, 128U - current); + + do { + IN_PREPARE(sc->buf); + SPH_XCAT(CORE, PASSES)(INW); + } while (0); + current = 0; + } + memset(sc->buf + current, 0, 118U - current); + sc->buf[118] = 0x01 | (PASSES << 3); + sc->buf[119] = sc->olen << 3; +#if SPH_64 + sph_enc64le_aligned(sc->buf + 120, SPH_T64(sc->count << 3)); +#else + sph_enc32le_aligned(sc->buf + 120, SPH_T32(sc->count_low << 3)); + sph_enc32le_aligned(sc->buf + 124, + SPH_T32((sc->count_high << 3) | (sc->count_low >> 29))); +#endif + + do { + IN_PREPARE(sc->buf); + SPH_XCAT(CORE, PASSES)(INW); + } while (0); + WSTATE; + + haval_out(sc, dst); + haval_init(sc, sc->olen, sc->passes); +} diff --git a/sph/md_helper.c b/sph/md_helper.c new file mode 100644 index 0000000000..5384f03f73 --- /dev/null +++ b/sph/md_helper.c @@ -0,0 +1,346 @@ +/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * This file contains some functions which implement the external data + * handling and padding for Merkle-Damgard hash functions which follow + * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian). + * + * API: this file is meant to be included, not compiled as a stand-alone + * file. Some macros must be defined: + * RFUN name for the round function + * HASH "short name" for the hash function + * BE32 defined for big-endian, 32-bit based (e.g. SHA-1) + * LE32 defined for little-endian, 32-bit based (e.g. MD5) + * BE64 defined for big-endian, 64-bit based (e.g. SHA-512) + * LE64 defined for little-endian, 64-bit based (no example yet) + * PW01 if defined, append 0x01 instead of 0x80 (for Tiger) + * BLEN if defined, length of a message block (in bytes) + * PLW1 if defined, length is defined on one 64-bit word only (for Tiger) + * PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL) + * SVAL if defined, reference to the context state information + * + * BLEN is used when a message block is not 16 (32-bit or 64-bit) words: + * this is used for instance for Tiger, which works on 64-bit words but + * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are + * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is + * set, then only one word (64 bits) will be used to encode the input + * message length (in bits), otherwise two words will be used (as in + * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but + * not PLW1), four 64-bit words will be used to encode the message length + * (in bits). Note that regardless of those settings, only 64-bit message + * lengths are supported (in bits): messages longer than 2 Exabytes will be + * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about + * 2 millions Terabytes, which is huge). + * + * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close() + * function. This is used for Tiger2, which is identical to Tiger except + * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead + * of the 0x01 from original Tiger). + * + * The RFUN function is invoked with two arguments, the first pointing to + * aligned data (as a "const void *"), the second being state information + * from the context structure. By default, this state information is the + * "val" field from the context, and this field is assumed to be an array + * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64). + * from the context structure. The "val" field can have any type, except + * for the output encoding which assumes that it is an array of "sph_u32" + * values. By defining NO_OUTPUT, this last step is deactivated; the + * includer code is then responsible for writing out the hash result. When + * NO_OUTPUT is defined, the third parameter to the "close()" function is + * ignored. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +#undef SPH_XCAT +#define SPH_XCAT(a, b) SPH_XCAT_(a, b) +#undef SPH_XCAT_ +#define SPH_XCAT_(a, b) a ## b + +#undef SPH_BLEN +#undef SPH_WLEN +#if defined BE64 || defined LE64 +#define SPH_BLEN 128U +#define SPH_WLEN 8U +#else +#define SPH_BLEN 64U +#define SPH_WLEN 4U +#endif + +#ifdef BLEN +#undef SPH_BLEN +#define SPH_BLEN BLEN +#endif + +#undef SPH_MAXPAD +#if defined PLW1 +#define SPH_MAXPAD (SPH_BLEN - SPH_WLEN) +#elif defined PLW4 +#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2)) +#else +#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1)) +#endif + +#undef SPH_VAL +#undef SPH_NO_OUTPUT +#ifdef SVAL +#define SPH_VAL SVAL +#define SPH_NO_OUTPUT 1 +#else +#define SPH_VAL sc->val +#endif + +#ifndef CLOSE_ONLY + +#ifdef SPH_UPTR +static void +SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len) +#else +void +SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) +#endif +{ + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; + unsigned current; + + sc = cc; +#if SPH_64 + current = (unsigned)sc->count & (SPH_BLEN - 1U); +#else + current = (unsigned)sc->count_low & (SPH_BLEN - 1U); +#endif + while (len > 0) { + unsigned clen; +#if !SPH_64 + sph_u32 clow, clow2; +#endif + + clen = SPH_BLEN - current; + if (clen > len) + clen = len; + memcpy(sc->buf + current, data, clen); + data = (const unsigned char *)data + clen; + current += clen; + len -= clen; + if (current == SPH_BLEN) { + RFUN(sc->buf, SPH_VAL); + current = 0; + } +#if SPH_64 + sc->count += clen; +#else + clow = sc->count_low; + clow2 = SPH_T32(clow + clen); + sc->count_low = clow2; + if (clow2 < clow) + sc->count_high ++; +#endif + } +} + +#ifdef SPH_UPTR +void +SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) +{ + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; + unsigned current; + size_t orig_len; +#if !SPH_64 + sph_u32 clow, clow2; +#endif + + if (len < (2 * SPH_BLEN)) { + SPH_XCAT(HASH, _short)(cc, data, len); + return; + } + sc = cc; +#if SPH_64 + current = (unsigned)sc->count & (SPH_BLEN - 1U); +#else + current = (unsigned)sc->count_low & (SPH_BLEN - 1U); +#endif + if (current > 0) { + unsigned t; + + t = SPH_BLEN - current; + SPH_XCAT(HASH, _short)(cc, data, t); + data = (const unsigned char *)data + t; + len -= t; + } +#if !SPH_UNALIGNED + if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) { + SPH_XCAT(HASH, _short)(cc, data, len); + return; + } +#endif + orig_len = len; + while (len >= SPH_BLEN) { + RFUN(data, SPH_VAL); + len -= SPH_BLEN; + data = (const unsigned char *)data + SPH_BLEN; + } + if (len > 0) + memcpy(sc->buf, data, len); +#if SPH_64 + sc->count += (sph_u64)orig_len; +#else + clow = sc->count_low; + clow2 = SPH_T32(clow + orig_len); + sc->count_low = clow2; + if (clow2 < clow) + sc->count_high ++; + /* + * This code handles the improbable situation where "size_t" is + * greater than 32 bits, and yet we do not have a 64-bit type. + */ + orig_len >>= 12; + orig_len >>= 10; + orig_len >>= 10; + sc->count_high += orig_len; +#endif +} +#endif + +#endif + +/* + * Perform padding and produce result. The context is NOT reinitialized + * by this function. + */ +static void +SPH_XCAT(HASH, _addbits_and_close)(void *cc, + unsigned ub, unsigned n, void *dst, unsigned rnum) +{ + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; + unsigned current, u; +#if !SPH_64 + sph_u32 low, high; +#endif + + sc = cc; +#if SPH_64 + current = (unsigned)sc->count & (SPH_BLEN - 1U); +#else + current = (unsigned)sc->count_low & (SPH_BLEN - 1U); +#endif +#ifdef PW01 + sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n); +#else + { + unsigned z; + + z = 0x80 >> n; + sc->buf[current ++] = ((ub & -z) | z) & 0xFF; + } +#endif + if (current > SPH_MAXPAD) { + memset(sc->buf + current, 0, SPH_BLEN - current); + RFUN(sc->buf, SPH_VAL); + memset(sc->buf, 0, SPH_MAXPAD); + } else { + memset(sc->buf + current, 0, SPH_MAXPAD - current); + } +#if defined BE64 +#if defined PLW1 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); +#elif defined PLW4 + memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN); + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, + sc->count >> 61); + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN, + SPH_T64(sc->count << 3) + (sph_u64)n); +#else + sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61); + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, + SPH_T64(sc->count << 3) + (sph_u64)n); +#endif +#elif defined LE64 +#if defined PLW1 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); +#elif defined PLW1 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); + sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); + memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN); +#else + sph_enc64le_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); + sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); +#endif +#else +#if SPH_64 +#ifdef BE32 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); +#else + sph_enc64le_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); +#endif +#else + low = sc->count_low; + high = SPH_T32((sc->count_high << 3) | (low >> 29)); + low = SPH_T32(low << 3) + (sph_u32)n; +#ifdef BE32 + sph_enc32be(sc->buf + SPH_MAXPAD, high); + sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low); +#else + sph_enc32le(sc->buf + SPH_MAXPAD, low); + sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high); +#endif +#endif +#endif + RFUN(sc->buf, SPH_VAL); +#ifdef SPH_NO_OUTPUT + (void)dst; + (void)rnum; + (void)u; +#else + for (u = 0; u < rnum; u ++) { +#if defined BE64 + sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]); +#elif defined LE64 + sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]); +#elif defined BE32 + sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]); +#else + sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]); +#endif + } +#endif +} + +static void +SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum) +{ + SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum); +} diff --git a/sph/neoscrypt.c b/sph/neoscrypt.c new file mode 100644 index 0000000000..202eee36fe --- /dev/null +++ b/sph/neoscrypt.c @@ -0,0 +1,967 @@ +/* + * Copyright (c) 2009 Colin Percival, 2011 ArtForz + * Copyright (c) 2012 Andrew Moon (floodyberry) + * Copyright (c) 2012 Samuel Neves + * Copyright (c) 2014 John Doering + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#include +#include +#include + +#include "neoscrypt.h" + + +#if (WINDOWS) +/* sizeof(unsigned long) = 4 for MinGW64 */ +typedef unsigned long long ulong; +#else +typedef unsigned long ulong; +#endif +typedef unsigned int uint; +typedef unsigned char uchar; +typedef unsigned int bool; + + +#define MIN(a, b) ((a) < (b) ? a : b) +#define MAX(a, b) ((a) > (b) ? a : b) + + +/* SHA-256 */ + +static const uint32_t sha256_constants[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define Ch(x,y,z) (z ^ (x & (y ^ z))) +#define Maj(x,y,z) (((x | y) & z) | (x & y)) +#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22)) +#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25)) +#define G0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ (x >> 3)) +#define G1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ (x >> 10)) +#define W0(in,i) (U8TO32_BE(&in[i * 4])) +#define W1(i) (G1(w[i - 2]) + w[i - 7] + G0(w[i - 15]) + w[i - 16]) +#define STEP(i) \ + t1 = S0(r[0]) + Maj(r[0], r[1], r[2]); \ + t0 = r[7] + S1(r[4]) + Ch(r[4], r[5], r[6]) + sha256_constants[i] + w[i]; \ + r[7] = r[6]; \ + r[6] = r[5]; \ + r[5] = r[4]; \ + r[4] = r[3] + t0; \ + r[3] = r[2]; \ + r[2] = r[1]; \ + r[1] = r[0]; \ + r[0] = t0 + t1; + + +typedef struct sha256_hash_state_t { + uint32_t H[8]; + uint64_t T; + uint32_t leftover; + uint8_t buffer[SCRYPT_HASH_BLOCK_SIZE]; +} sha256_hash_state; + + +static void sha256_blocks(sha256_hash_state *S, const uint8_t *in, size_t blocks) { + uint32_t r[8], w[64], t0, t1; + size_t i; + + for(i = 0; i < 8; i++) + r[i] = S->H[i]; + + while(blocks--) { + for(i = 0; i < 16; i++) { + w[i] = W0(in, i); + } + for(i = 16; i < 64; i++) { + w[i] = W1(i); + } + for(i = 0; i < 64; i++) { + STEP(i); + } + for(i = 0; i < 8; i++) { + r[i] += S->H[i]; + S->H[i] = r[i]; + } + S->T += SCRYPT_HASH_BLOCK_SIZE * 8; + in += SCRYPT_HASH_BLOCK_SIZE; + } +} + +static void neoscrypt_hash_init_sha256(sha256_hash_state *S) { + S->H[0] = 0x6a09e667; + S->H[1] = 0xbb67ae85; + S->H[2] = 0x3c6ef372; + S->H[3] = 0xa54ff53a; + S->H[4] = 0x510e527f; + S->H[5] = 0x9b05688c; + S->H[6] = 0x1f83d9ab; + S->H[7] = 0x5be0cd19; + S->T = 0; + S->leftover = 0; +} + +static void neoscrypt_hash_update_sha256(sha256_hash_state *S, const uint8_t *in, size_t inlen) { + size_t blocks, want; + + /* handle the previous data */ + if(S->leftover) { + want = (SCRYPT_HASH_BLOCK_SIZE - S->leftover); + want = (want < inlen) ? want : inlen; + memcpy(S->buffer + S->leftover, in, want); + S->leftover += (uint32_t)want; + if(S->leftover < SCRYPT_HASH_BLOCK_SIZE) + return; + in += want; + inlen -= want; + sha256_blocks(S, S->buffer, 1); + } + + /* handle the current data */ + blocks = (inlen & ~(SCRYPT_HASH_BLOCK_SIZE - 1)); + S->leftover = (uint32_t)(inlen - blocks); + if(blocks) { + sha256_blocks(S, in, blocks / SCRYPT_HASH_BLOCK_SIZE); + in += blocks; + } + + /* handle leftover data */ + if(S->leftover) + memcpy(S->buffer, in, S->leftover); +} + +static void neoscrypt_hash_finish_sha256(sha256_hash_state *S, uint8_t *hash) { + uint64_t t = S->T + (S->leftover * 8); + + S->buffer[S->leftover] = 0x80; + if(S->leftover <= 55) { + memset(S->buffer + S->leftover + 1, 0, 55 - S->leftover); + } else { + memset(S->buffer + S->leftover + 1, 0, 63 - S->leftover); + sha256_blocks(S, S->buffer, 1); + memset(S->buffer, 0, 56); + } + + U64TO8_BE(S->buffer + 56, t); + sha256_blocks(S, S->buffer, 1); + + U32TO8_BE(&hash[ 0], S->H[0]); + U32TO8_BE(&hash[ 4], S->H[1]); + U32TO8_BE(&hash[ 8], S->H[2]); + U32TO8_BE(&hash[12], S->H[3]); + U32TO8_BE(&hash[16], S->H[4]); + U32TO8_BE(&hash[20], S->H[5]); + U32TO8_BE(&hash[24], S->H[6]); + U32TO8_BE(&hash[28], S->H[7]); +} + +static void neoscrypt_hash_sha256(hash_digest hash, const uint8_t *m, size_t mlen) { + sha256_hash_state st; + neoscrypt_hash_init_sha256(&st); + neoscrypt_hash_update_sha256(&st, m, mlen); + neoscrypt_hash_finish_sha256(&st, hash); +} + + +/* HMAC for SHA-256 */ + +typedef struct sha256_hmac_state_t { + sha256_hash_state inner, outer; +} sha256_hmac_state; + +static void neoscrypt_hmac_init_sha256(sha256_hmac_state *st, const uint8_t *key, size_t keylen) { + uint8_t pad[SCRYPT_HASH_BLOCK_SIZE] = {0}; + size_t i; + + neoscrypt_hash_init_sha256(&st->inner); + neoscrypt_hash_init_sha256(&st->outer); + + if(keylen <= SCRYPT_HASH_BLOCK_SIZE) { + /* use the key directly if it's <= blocksize bytes */ + memcpy(pad, key, keylen); + } else { + /* if it's > blocksize bytes, hash it */ + neoscrypt_hash_sha256(pad, key, keylen); + } + + /* inner = (key ^ 0x36) */ + /* h(inner || ...) */ + for(i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) + pad[i] ^= 0x36; + neoscrypt_hash_update_sha256(&st->inner, pad, SCRYPT_HASH_BLOCK_SIZE); + + /* outer = (key ^ 0x5c) */ + /* h(outer || ...) */ + for(i = 0; i < SCRYPT_HASH_BLOCK_SIZE; i++) + pad[i] ^= (0x5c ^ 0x36); + neoscrypt_hash_update_sha256(&st->outer, pad, SCRYPT_HASH_BLOCK_SIZE); +} + +static void neoscrypt_hmac_update_sha256(sha256_hmac_state *st, const uint8_t *m, size_t mlen) { + /* h(inner || m...) */ + neoscrypt_hash_update_sha256(&st->inner, m, mlen); +} + +static void neoscrypt_hmac_finish_sha256(sha256_hmac_state *st, hash_digest mac) { + /* h(inner || m) */ + hash_digest innerhash; + neoscrypt_hash_finish_sha256(&st->inner, innerhash); + + /* h(outer || h(inner || m)) */ + neoscrypt_hash_update_sha256(&st->outer, innerhash, sizeof(innerhash)); + neoscrypt_hash_finish_sha256(&st->outer, mac); +} + + +/* PBKDF2 for SHA-256 */ + +static void neoscrypt_pbkdf2_sha256(const uint8_t *password, size_t password_len, + const uint8_t *salt, size_t salt_len, uint64_t N, uint8_t *output, size_t output_len) { + sha256_hmac_state hmac_pw, hmac_pw_salt, work; + hash_digest ti, u; + uint8_t be[4]; + uint32_t i, j, k, blocks; + + /* bytes must be <= (0xffffffff - (SCRYPT_HASH_DIGEST_SIZE - 1)), which they will always be under scrypt */ + + /* hmac(password, ...) */ + neoscrypt_hmac_init_sha256(&hmac_pw, password, password_len); + + /* hmac(password, salt...) */ + hmac_pw_salt = hmac_pw; + neoscrypt_hmac_update_sha256(&hmac_pw_salt, salt, salt_len); + + blocks = ((uint32_t)output_len + (SCRYPT_HASH_DIGEST_SIZE - 1)) / SCRYPT_HASH_DIGEST_SIZE; + for(i = 1; i <= blocks; i++) { + /* U1 = hmac(password, salt || be(i)) */ + U32TO8_BE(be, i); + work = hmac_pw_salt; + neoscrypt_hmac_update_sha256(&work, be, 4); + neoscrypt_hmac_finish_sha256(&work, ti); + memcpy(u, ti, sizeof(u)); + + /* T[i] = U1 ^ U2 ^ U3... */ + for(j = 0; j < N - 1; j++) { + /* UX = hmac(password, U{X-1}) */ + work = hmac_pw; + neoscrypt_hmac_update_sha256(&work, u, SCRYPT_HASH_DIGEST_SIZE); + neoscrypt_hmac_finish_sha256(&work, u); + + /* T[i] ^= UX */ + for(k = 0; k < sizeof(u); k++) + ti[k] ^= u[k]; + } + + memcpy(output, ti, (output_len > SCRYPT_HASH_DIGEST_SIZE) ? SCRYPT_HASH_DIGEST_SIZE : output_len); + output += SCRYPT_HASH_DIGEST_SIZE; + output_len -= SCRYPT_HASH_DIGEST_SIZE; + } +} + + +/* NeoScrypt */ + +#if defined(ASM) + +extern void neoscrypt_salsa(uint *X, uint rounds); +extern void neoscrypt_salsa_tangle(uint *X, uint count); +extern void neoscrypt_chacha(uint *X, uint rounds); + +extern void neoscrypt_blkcpy(void *dstp, const void *srcp, uint len); +extern void neoscrypt_blkswp(void *blkAp, void *blkBp, uint len); +extern void neoscrypt_blkxor(void *dstp, const void *srcp, uint len); + +#else + +/* Salsa20, rounds must be a multiple of 2 */ +static void neoscrypt_salsa(uint *X, uint rounds) { + uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t; + + x0 = X[0]; x1 = X[1]; x2 = X[2]; x3 = X[3]; + x4 = X[4]; x5 = X[5]; x6 = X[6]; x7 = X[7]; + x8 = X[8]; x9 = X[9]; x10 = X[10]; x11 = X[11]; + x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15]; + +#define quarter(a, b, c, d) \ + t = a + d; t = ROTL32(t, 7); b ^= t; \ + t = b + a; t = ROTL32(t, 9); c ^= t; \ + t = c + b; t = ROTL32(t, 13); d ^= t; \ + t = d + c; t = ROTL32(t, 18); a ^= t; + + for(; rounds; rounds -= 2) { + quarter( x0, x4, x8, x12); + quarter( x5, x9, x13, x1); + quarter(x10, x14, x2, x6); + quarter(x15, x3, x7, x11); + quarter( x0, x1, x2, x3); + quarter( x5, x6, x7, x4); + quarter(x10, x11, x8, x9); + quarter(x15, x12, x13, x14); + } + + X[0] += x0; X[1] += x1; X[2] += x2; X[3] += x3; + X[4] += x4; X[5] += x5; X[6] += x6; X[7] += x7; + X[8] += x8; X[9] += x9; X[10] += x10; X[11] += x11; + X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15; + +#undef quarter +} + +/* ChaCha20, rounds must be a multiple of 2 */ +static void neoscrypt_chacha(uint *X, uint rounds) { + uint x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, t; + + x0 = X[0]; x1 = X[1]; x2 = X[2]; x3 = X[3]; + x4 = X[4]; x5 = X[5]; x6 = X[6]; x7 = X[7]; + x8 = X[8]; x9 = X[9]; x10 = X[10]; x11 = X[11]; + x12 = X[12]; x13 = X[13]; x14 = X[14]; x15 = X[15]; + +#define quarter(a,b,c,d) \ + a += b; t = d ^ a; d = ROTL32(t, 16); \ + c += d; t = b ^ c; b = ROTL32(t, 12); \ + a += b; t = d ^ a; d = ROTL32(t, 8); \ + c += d; t = b ^ c; b = ROTL32(t, 7); + + for(; rounds; rounds -= 2) { + quarter( x0, x4, x8, x12); + quarter( x1, x5, x9, x13); + quarter( x2, x6, x10, x14); + quarter( x3, x7, x11, x15); + quarter( x0, x5, x10, x15); + quarter( x1, x6, x11, x12); + quarter( x2, x7, x8, x13); + quarter( x3, x4, x9, x14); + } + + X[0] += x0; X[1] += x1; X[2] += x2; X[3] += x3; + X[4] += x4; X[5] += x5; X[6] += x6; X[7] += x7; + X[8] += x8; X[9] += x9; X[10] += x10; X[11] += x11; + X[12] += x12; X[13] += x13; X[14] += x14; X[15] += x15; + +#undef quarter +} + + +/* Fast 32-bit / 64-bit memcpy(); + * len must be a multiple of 32 bytes */ +static void neoscrypt_blkcpy(void *dstp, const void *srcp, uint len) { + ulong *dst = (ulong *) dstp; + ulong *src = (ulong *) srcp; + uint i; + + for(i = 0; i < (len / sizeof(ulong)); i += 4) { + dst[i] = src[i]; + dst[i + 1] = src[i + 1]; + dst[i + 2] = src[i + 2]; + dst[i + 3] = src[i + 3]; + } +} + +/* Fast 32-bit / 64-bit block swapper; + * len must be a multiple of 32 bytes */ +static void neoscrypt_blkswp(void *blkAp, void *blkBp, uint len) { + ulong *blkA = (ulong *) blkAp; + ulong *blkB = (ulong *) blkBp; + register ulong t0, t1, t2, t3; + uint i; + + for(i = 0; i < (len / sizeof(ulong)); i += 4) { + t0 = blkA[i]; + t1 = blkA[i + 1]; + t2 = blkA[i + 2]; + t3 = blkA[i + 3]; + blkA[i] = blkB[i]; + blkA[i + 1] = blkB[i + 1]; + blkA[i + 2] = blkB[i + 2]; + blkA[i + 3] = blkB[i + 3]; + blkB[i] = t0; + blkB[i + 1] = t1; + blkB[i + 2] = t2; + blkB[i + 3] = t3; + } +} + +/* Fast 32-bit / 64-bit block XOR engine; + * len must be a multiple of 32 bytes */ +static void neoscrypt_blkxor(void *dstp, const void *srcp, uint len) { + ulong *dst = (ulong *) dstp; + ulong *src = (ulong *) srcp; + uint i; + + for(i = 0; i < (len / sizeof(ulong)); i += 4) { + dst[i] ^= src[i]; + dst[i + 1] ^= src[i + 1]; + dst[i + 2] ^= src[i + 2]; + dst[i + 3] ^= src[i + 3]; + } +} + +#endif + +/* 32-bit / 64-bit optimised memcpy() */ +static void neoscrypt_copy(void *dstp, const void *srcp, uint len) { + ulong *dst = (ulong *) dstp; + ulong *src = (ulong *) srcp; + uint i, tail; + + for(i = 0; i < (len / sizeof(ulong)); i++) + dst[i] = src[i]; + + tail = len & (sizeof(ulong) - 1); + if(tail) { + uchar *dstb = (uchar *) dstp; + uchar *srcb = (uchar *) srcp; + + for(i = len - tail; i < len; i++) + dstb[i] = srcb[i]; + } +} + +/* 32-bit / 64-bit optimised memory erase aka memset() to zero */ +static void neoscrypt_erase(void *dstp, uint len) { + const ulong null = 0; + ulong *dst = (ulong *) dstp; + uint i, tail; + + for(i = 0; i < (len / sizeof(ulong)); i++) + dst[i] = null; + + tail = len & (sizeof(ulong) - 1); + if(tail) { + uchar *dstb = (uchar *) dstp; + + for(i = len - tail; i < len; i++) + dstb[i] = (uchar)null; + } +} + +/* 32-bit / 64-bit optimised XOR engine */ +static void neoscrypt_xor(void *dstp, const void *srcp, uint len) { + ulong *dst = (ulong *) dstp; + ulong *src = (ulong *) srcp; + uint i, tail; + + for(i = 0; i < (len / sizeof(ulong)); i++) + dst[i] ^= src[i]; + + tail = len & (sizeof(ulong) - 1); + if(tail) { + uchar *dstb = (uchar *) dstp; + uchar *srcb = (uchar *) srcp; + + for(i = len - tail; i < len; i++) + dstb[i] ^= srcb[i]; + } +} + + +/* BLAKE2s */ + +#define BLAKE2S_BLOCK_SIZE 64U +#define BLAKE2S_OUT_SIZE 32U +#define BLAKE2S_KEY_SIZE 32U + +/* Parameter block of 32 bytes */ +typedef struct blake2s_param_t { + uchar digest_length; + uchar key_length; + uchar fanout; + uchar depth; + uint leaf_length; + uchar node_offset[6]; + uchar node_depth; + uchar inner_length; + uchar salt[8]; + uchar personal[8]; +} blake2s_param; + +/* State block of 180 bytes */ +typedef struct blake2s_state_t { + uint h[8]; + uint t[2]; + uint f[2]; + uchar buf[2 * BLAKE2S_BLOCK_SIZE]; + uint buflen; +} blake2s_state; + +static const uint blake2s_IV[8] = { + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, + 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 +}; + +static const uint8_t blake2s_sigma[10][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } , + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } , + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } , + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } , + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } , + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } , + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } , + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } , + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } , + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } , +}; + +static void blake2s_compress(blake2s_state *S, const uint *buf) { + uint i; + uint m[16]; + uint v[16]; + + neoscrypt_copy(m, buf, 64); + neoscrypt_copy(v, S, 32); + + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = ROTR32(d ^ a, 16); \ + c = c + d; \ + b = ROTR32(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = ROTR32(d ^ a, 8); \ + c = c + d; \ + b = ROTR32(b ^ c, 7); \ + } while(0) +#define ROUND(r) \ + do { \ + G(r, 0, v[ 0], v[ 4], v[ 8], v[12]); \ + G(r, 1, v[ 1], v[ 5], v[ 9], v[13]); \ + G(r, 2, v[ 2], v[ 6], v[10], v[14]); \ + G(r, 3, v[ 3], v[ 7], v[11], v[15]); \ + G(r, 4, v[ 0], v[ 5], v[10], v[15]); \ + G(r, 5, v[ 1], v[ 6], v[11], v[12]); \ + G(r, 6, v[ 2], v[ 7], v[ 8], v[13]); \ + G(r, 7, v[ 3], v[ 4], v[ 9], v[14]); \ + } while(0) + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + + for(i = 0; i < 8; i++) + S->h[i] = S->h[i] ^ v[i] ^ v[i + 8]; + +#undef G +#undef ROUND +} + +static void blake2s_update(blake2s_state *S, const uchar *input, uint input_size) { + uint left, fill; + + while(input_size > 0) { + left = S->buflen; + fill = 2 * BLAKE2S_BLOCK_SIZE - left; + if(input_size > fill) { + /* Buffer fill */ + neoscrypt_copy(S->buf + left, input, fill); + S->buflen += fill; + /* Counter increment */ + S->t[0] += BLAKE2S_BLOCK_SIZE; + /* Compress */ + blake2s_compress(S, (uint *) S->buf); + /* Shift buffer left */ + neoscrypt_copy(S->buf, S->buf + BLAKE2S_BLOCK_SIZE, BLAKE2S_BLOCK_SIZE); + S->buflen -= BLAKE2S_BLOCK_SIZE; + input += fill; + input_size -= fill; + } else { + neoscrypt_copy(S->buf + left, input, input_size); + S->buflen += input_size; + /* Do not compress */ + input += input_size; + input_size = 0; + } + } +} + +static void neoscrypt_blake2s(const void *input, const uint input_size, const void *key, const uchar key_size, + void *output, const uchar output_size) { + uchar block[BLAKE2S_BLOCK_SIZE]; + blake2s_param P[1]; + blake2s_state S[1]; + + /* Initialise */ + neoscrypt_erase(P, 32); + P->digest_length = output_size; + P->key_length = key_size; + P->fanout = 1; + P->depth = 1; + + neoscrypt_erase(S, 180); + neoscrypt_copy(S, blake2s_IV, 32); + neoscrypt_xor(S, P, 32); + + neoscrypt_erase(block, BLAKE2S_BLOCK_SIZE); + neoscrypt_copy(block, key, key_size); + blake2s_update(S, (uchar *) block, BLAKE2S_BLOCK_SIZE); + + /* Update */ + blake2s_update(S, (uchar *) input, input_size); + + /* Finish */ + if(S->buflen > BLAKE2S_BLOCK_SIZE) { + S->t[0] += BLAKE2S_BLOCK_SIZE; + blake2s_compress(S, (uint *) S->buf); + S->buflen -= BLAKE2S_BLOCK_SIZE; + neoscrypt_copy(S->buf, S->buf + BLAKE2S_BLOCK_SIZE, S->buflen); + } + S->t[0] += S->buflen; + S->f[0] = ~0U; + neoscrypt_erase(S->buf + S->buflen, 2 * BLAKE2S_BLOCK_SIZE - S->buflen); + blake2s_compress(S, (uint *) S->buf); + + /* Write back */ + neoscrypt_copy(output, S, output_size); +} + + +#define FASTKDF_BUFFER_SIZE 256U + +/* FastKDF, a fast buffered key derivation function: + * FASTKDF_BUFFER_SIZE must be a power of 2; + * password_len, salt_len and output_len should not exceed FASTKDF_BUFFER_SIZE; + * prf_output_size must be <= prf_key_size; */ +static void neoscrypt_fastkdf(const uchar *password, uint password_len, const uchar *salt, uint salt_len, + uint N, uchar *output, uint output_len) { + const uint stack_align = 0x40; + const uint kdf_buf_size = 256U; //FASTKDF_BUFFER_SIZE + const uint prf_input_size = 64U; //BLAKE2S_BLOCK_SIZE + const uint prf_key_size = 32U; //BLAKE2S_KEY_SIZE + const uint prf_output_size = 32U; //BLAKE2S_OUT_SIZE + uint bufptr, a, b, i, j; + uchar *A, *B, *prf_input, *prf_key, *prf_output; + uchar *stack; + stack = (uchar*)malloc(sizeof(uchar) * 2 * kdf_buf_size + prf_input_size + prf_key_size + prf_output_size + stack_align); + /* Align and set up the buffers in stack */ + //uchar stack[2 * kdf_buf_size + prf_input_size + prf_key_size + prf_output_size + stack_align]; + + A = &stack[stack_align & ~(stack_align - 1)]; + B = &A[kdf_buf_size + prf_input_size]; + prf_output = &A[2 * kdf_buf_size + prf_input_size + prf_key_size]; + + /* Initialise the password buffer */ + if(password_len > kdf_buf_size) + password_len = kdf_buf_size; + + a = kdf_buf_size / password_len; + for(i = 0; i < a; i++) + neoscrypt_copy(&A[i * password_len], &password[0], password_len); + b = kdf_buf_size - a * password_len; + if(b) + neoscrypt_copy(&A[a * password_len], &password[0], b); + neoscrypt_copy(&A[kdf_buf_size], &password[0], prf_input_size); + + /* Initialise the salt buffer */ + if(salt_len > kdf_buf_size) + salt_len = kdf_buf_size; + + a = kdf_buf_size / salt_len; + for(i = 0; i < a; i++) + neoscrypt_copy(&B[i * salt_len], &salt[0], salt_len); + b = kdf_buf_size - a * salt_len; + if(b) + neoscrypt_copy(&B[a * salt_len], &salt[0], b); + neoscrypt_copy(&B[kdf_buf_size], &salt[0], prf_key_size); + + /* The primary iteration */ + for(i = 0, bufptr = 0; i < N; i++) { + + /* Map the PRF input buffer */ + prf_input = &A[bufptr]; + + /* Map the PRF key buffer */ + prf_key = &B[bufptr]; + + /* PRF */ + neoscrypt_blake2s(prf_input, prf_input_size, prf_key, prf_key_size, prf_output, prf_output_size); + + /* Calculate the next buffer pointer */ + for(j = 0, bufptr = 0; j < prf_output_size; j++) + bufptr += prf_output[j]; + bufptr &= (kdf_buf_size - 1); + + /* Modify the salt buffer */ + neoscrypt_xor(&B[bufptr], &prf_output[0], prf_output_size); + + /* Head modified, tail updated */ + if(bufptr < prf_key_size) + neoscrypt_copy(&B[kdf_buf_size + bufptr], &B[bufptr], MIN(prf_output_size, prf_key_size - bufptr)); + + /* Tail modified, head updated */ + if((kdf_buf_size - bufptr) < prf_output_size) + neoscrypt_copy(&B[0], &B[kdf_buf_size], prf_output_size - (kdf_buf_size - bufptr)); + + } + + /* Modify and copy into the output buffer */ + if(output_len > kdf_buf_size) + output_len = kdf_buf_size; + + a = kdf_buf_size - bufptr; + if(a >= output_len) { + neoscrypt_xor(&B[bufptr], &A[0], output_len); + neoscrypt_copy(&output[0], &B[bufptr], output_len); + } else { + neoscrypt_xor(&B[bufptr], &A[0], a); + neoscrypt_xor(&B[0], &A[a], output_len - a); + neoscrypt_copy(&output[0], &B[bufptr], a); + neoscrypt_copy(&output[a], &B[0], output_len - a); + } + +} + + +/* Configurable optimised block mixer */ +static void neoscrypt_blkmix(uint *X, uint *Y, uint r, uint mixmode) { + uint i, mixer, rounds; + + mixer = mixmode >> 8; + rounds = mixmode & 0xFF; + + /* NeoScrypt flow: Scrypt flow: + Xa ^= Xd; M(Xa'); Ya = Xa"; Xa ^= Xb; M(Xa'); Ya = Xa"; + Xb ^= Xa"; M(Xb'); Yb = Xb"; Xb ^= Xa"; M(Xb'); Yb = Xb"; + Xc ^= Xb"; M(Xc'); Yc = Xc"; Xa" = Ya; + Xd ^= Xc"; M(Xd'); Yd = Xd"; Xb" = Yb; + Xa" = Ya; Xb" = Yc; + Xc" = Yb; Xd" = Yd; */ + + if(r == 1) { + neoscrypt_blkxor(&X[0], &X[16], SCRYPT_BLOCK_SIZE); + if(mixer) + neoscrypt_chacha(&X[0], rounds); + else + neoscrypt_salsa(&X[0], rounds); + neoscrypt_blkxor(&X[16], &X[0], SCRYPT_BLOCK_SIZE); + if(mixer) + neoscrypt_chacha(&X[16], rounds); + else + neoscrypt_salsa(&X[16], rounds); + return; + } + + if(r == 2) { + neoscrypt_blkxor(&X[0], &X[48], SCRYPT_BLOCK_SIZE); + if(mixer) + neoscrypt_chacha(&X[0], rounds); + else + neoscrypt_salsa(&X[0], rounds); + neoscrypt_blkxor(&X[16], &X[0], SCRYPT_BLOCK_SIZE); + if(mixer) + neoscrypt_chacha(&X[16], rounds); + else + neoscrypt_salsa(&X[16], rounds); + neoscrypt_blkxor(&X[32], &X[16], SCRYPT_BLOCK_SIZE); + if(mixer) + neoscrypt_chacha(&X[32], rounds); + else + neoscrypt_salsa(&X[32], rounds); + neoscrypt_blkxor(&X[48], &X[32], SCRYPT_BLOCK_SIZE); + if(mixer) + neoscrypt_chacha(&X[48], rounds); + else + neoscrypt_salsa(&X[48], rounds); + neoscrypt_blkswp(&X[16], &X[32], SCRYPT_BLOCK_SIZE); + return; + } + + /* Reference code for any reasonable r */ + for(i = 0; i < 2 * r; i++) { + if(i) neoscrypt_blkxor(&X[16 * i], &X[16 * (i - 1)], SCRYPT_BLOCK_SIZE); + else neoscrypt_blkxor(&X[0], &X[16 * (2 * r - 1)], SCRYPT_BLOCK_SIZE); + if(mixer) + neoscrypt_chacha(&X[16 * i], rounds); + else + neoscrypt_salsa(&X[16 * i], rounds); + neoscrypt_blkcpy(&Y[16 * i], &X[16 * i], SCRYPT_BLOCK_SIZE); + } + for(i = 0; i < r; i++) + neoscrypt_blkcpy(&X[16 * i], &Y[16 * 2 * i], SCRYPT_BLOCK_SIZE); + for(i = 0; i < r; i++) + neoscrypt_blkcpy(&X[16 * (i + r)], &Y[16 * (2 * i + 1)], SCRYPT_BLOCK_SIZE); +} + +/* NeoScrypt core engine: + * p = 1, salt = password; + * Basic customisation (required): + * profile bit 0: + * 0 = NeoScrypt(128, 2, 1) with Salsa20/20 and ChaCha20/20; + * 1 = Scrypt(1024, 1, 1) with Salsa20/8; + * profile bits 4 to 1: + * 0000 = FastKDF-BLAKE2s; + * 0001 = PBKDF2-HMAC-SHA256; + * Extended customisation (optional): + * profile bit 31: + * 0 = extended customisation absent; + * 1 = extended customisation present; + * profile bits 7 to 5 (rfactor): + * 000 = r of 1; + * 001 = r of 2; + * 010 = r of 4; + * ... + * 111 = r of 128; + * profile bits 12 to 8 (Nfactor): + * 00000 = N of 2; + * 00001 = N of 4; + * 00010 = N of 8; + * ..... + * 00110 = N of 128; + * ..... + * 01001 = N of 1024; + * ..... + * 11110 = N of 2147483648; + * profile bits 30 to 13 are reserved */ +void neoscrypt(const uchar *password, uchar *output, uint profile) { + uint N = 128, r = 2, dblmix = 1, mixmode = 0x14, stack_align = 0x40; + uint kdf, i, j; + uint *X, *Y, *Z, *V; + + if(profile & 0x1) { + N = 1024; /* N = (1 << (Nfactor + 1)); */ + r = 1; /* r = (1 << rfactor); */ + dblmix = 0; /* Salsa only */ + mixmode = 0x08; /* 8 rounds */ + } + + if(profile >> 31) { + N = (1 << (((profile >> 8) & 0x1F) + 1)); + r = (1 << ((profile >> 5) & 0x7)); + } + uchar *stack; + stack = (uchar*)malloc(((N + 3) * r * 2 * SCRYPT_BLOCK_SIZE + stack_align)*sizeof(uchar)); + /* X = r * 2 * SCRYPT_BLOCK_SIZE */ + X = (uint *) &stack[stack_align & ~(stack_align - 1)]; + /* Z is a copy of X for ChaCha */ + Z = &X[32 * r]; + /* Y is an X sized temporal space */ + Y = &X[64 * r]; + /* V = N * r * 2 * SCRYPT_BLOCK_SIZE */ + V = &X[96 * r]; + + /* X = KDF(password, salt) */ + kdf = (profile >> 1) & 0xF; + + switch(kdf) { + + default: + case(0x0): + neoscrypt_fastkdf(password, 80, password, 80, 32, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE); + break; + + case(0x1): + neoscrypt_pbkdf2_sha256(password, 80, password, 80, 1, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE); + break; + + } + + /* Process ChaCha 1st, Salsa 2nd and XOR them into FastKDF; otherwise Salsa only */ + + if(dblmix) { + /* blkcpy(Z, X) */ + neoscrypt_blkcpy(&Z[0], &X[0], r * 2 * SCRYPT_BLOCK_SIZE); + + /* Z = SMix(Z) */ + for(i = 0; i < N; i++) { + /* blkcpy(V, Z) */ + neoscrypt_blkcpy(&V[i * (32 * r)], &Z[0], r * 2 * SCRYPT_BLOCK_SIZE); + /* blkmix(Z, Y) */ + neoscrypt_blkmix(&Z[0], &Y[0], r, (mixmode | 0x0100)); + } + for(i = 0; i < N; i++) { + /* integerify(Z) mod N */ + j = (32 * r) * (Z[16 * (2 * r - 1)] & (N - 1)); + /* blkxor(Z, V) */ + neoscrypt_blkxor(&Z[0], &V[j], r * 2 * SCRYPT_BLOCK_SIZE); + /* blkmix(Z, Y) */ + neoscrypt_blkmix(&Z[0], &Y[0], r, (mixmode | 0x0100)); + } + } + +#if (ASM) + /* Must be called before and after SSE2 Salsa */ + neoscrypt_salsa_tangle(&X[0], r * 2); +#endif + + /* X = SMix(X) */ + for(i = 0; i < N; i++) { + /* blkcpy(V, X) */ + neoscrypt_blkcpy(&V[i * (32 * r)], &X[0], r * 2 * SCRYPT_BLOCK_SIZE); + /* blkmix(X, Y) */ + neoscrypt_blkmix(&X[0], &Y[0], r, mixmode); + } + for(i = 0; i < N; i++) { + /* integerify(X) mod N */ + j = (32 * r) * (X[16 * (2 * r - 1)] & (N - 1)); + /* blkxor(X, V) */ + neoscrypt_blkxor(&X[0], &V[j], r * 2 * SCRYPT_BLOCK_SIZE); + /* blkmix(X, Y) */ + neoscrypt_blkmix(&X[0], &Y[0], r, mixmode); + } + +#if (ASM) + neoscrypt_salsa_tangle(&X[0], r * 2); +#endif + + if(dblmix) + /* blkxor(X, Z) */ + neoscrypt_blkxor(&X[0], &Z[0], r * 2 * SCRYPT_BLOCK_SIZE); + + /* output = KDF(password, X) */ + switch(kdf) { + + default: + case(0x0): + neoscrypt_fastkdf(password, 80, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE, 32, output, 32); + break; + + case(0x1): + neoscrypt_pbkdf2_sha256(password, 80, (uchar *) X, r * 2 * SCRYPT_BLOCK_SIZE, 1, output, 32); + break; + + } + +} + diff --git a/sph/neoscrypt.h b/sph/neoscrypt.h new file mode 100644 index 0000000000..5c4d4e410a --- /dev/null +++ b/sph/neoscrypt.h @@ -0,0 +1,33 @@ +#if (__cplusplus) +extern "C" { +#endif + +void neoscrypt(const unsigned char *input, unsigned char *output, unsigned int profile); + +#if (__cplusplus) +} +#else + +#define SCRYPT_BLOCK_SIZE 64 +#define SCRYPT_HASH_BLOCK_SIZE 64 +#define SCRYPT_HASH_DIGEST_SIZE 32 + +typedef uint8_t hash_digest[SCRYPT_HASH_DIGEST_SIZE]; + +#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b))) +#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b))) + +#define U8TO32_BE(p) \ + (((uint32_t)((p)[0]) << 24) | ((uint32_t)((p)[1]) << 16) | \ + ((uint32_t)((p)[2]) << 8) | ((uint32_t)((p)[3]))) + +#define U32TO8_BE(p, v) \ + (p)[0] = (uint8_t)((v) >> 24); (p)[1] = (uint8_t)((v) >> 16); \ + (p)[2] = (uint8_t)((v) >> 8); (p)[3] = (uint8_t)((v) ); + +#define U64TO8_BE(p, v) \ + U32TO8_BE((p), (uint32_t)((v) >> 32)); \ + U32TO8_BE((p) + 4, (uint32_t)((v) )); + +#endif + diff --git a/sph/ripemd.c b/sph/ripemd.c new file mode 100644 index 0000000000..e242ac254b --- /dev/null +++ b/sph/ripemd.c @@ -0,0 +1,833 @@ +/* $Id: ripemd.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * RIPEMD-160 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_ripemd.h" + +/* + * Round functions for RIPEMD (original). + */ +#define F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define G(x, y, z) (((x) & (y)) | (((x) | (y)) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + +static const sph_u32 oIV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), + SPH_C32(0x98BADCFE), SPH_C32(0x10325476) +}; + +/* + * Round functions for RIPEMD-128 and RIPEMD-160. + */ +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + +static const sph_u32 IV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE), + SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0) +}; + +#define ROTL SPH_ROTL32 + +/* ===================================================================== */ +/* + * RIPEMD (original hash, deprecated). + */ + +#define FF1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + F(B, C, D) + (X)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define GG1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + G(B, C, D) \ + + (X) + SPH_C32(0x5A827999)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define HH1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ + + (X) + SPH_C32(0x6ED9EBA1)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define FF2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + F(B, C, D) \ + + (X) + SPH_C32(0x50A28BE6)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define GG2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + G(B, C, D) + (X)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define HH2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ + + (X) + SPH_C32(0x5C4DD124)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define RIPEMD_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1; \ + sph_u32 A2, B2, C2, D2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + \ + FF1(A1, B1, C1, D1, in( 0), 11); \ + FF1(D1, A1, B1, C1, in( 1), 14); \ + FF1(C1, D1, A1, B1, in( 2), 15); \ + FF1(B1, C1, D1, A1, in( 3), 12); \ + FF1(A1, B1, C1, D1, in( 4), 5); \ + FF1(D1, A1, B1, C1, in( 5), 8); \ + FF1(C1, D1, A1, B1, in( 6), 7); \ + FF1(B1, C1, D1, A1, in( 7), 9); \ + FF1(A1, B1, C1, D1, in( 8), 11); \ + FF1(D1, A1, B1, C1, in( 9), 13); \ + FF1(C1, D1, A1, B1, in(10), 14); \ + FF1(B1, C1, D1, A1, in(11), 15); \ + FF1(A1, B1, C1, D1, in(12), 6); \ + FF1(D1, A1, B1, C1, in(13), 7); \ + FF1(C1, D1, A1, B1, in(14), 9); \ + FF1(B1, C1, D1, A1, in(15), 8); \ + \ + GG1(A1, B1, C1, D1, in( 7), 7); \ + GG1(D1, A1, B1, C1, in( 4), 6); \ + GG1(C1, D1, A1, B1, in(13), 8); \ + GG1(B1, C1, D1, A1, in( 1), 13); \ + GG1(A1, B1, C1, D1, in(10), 11); \ + GG1(D1, A1, B1, C1, in( 6), 9); \ + GG1(C1, D1, A1, B1, in(15), 7); \ + GG1(B1, C1, D1, A1, in( 3), 15); \ + GG1(A1, B1, C1, D1, in(12), 7); \ + GG1(D1, A1, B1, C1, in( 0), 12); \ + GG1(C1, D1, A1, B1, in( 9), 15); \ + GG1(B1, C1, D1, A1, in( 5), 9); \ + GG1(A1, B1, C1, D1, in(14), 7); \ + GG1(D1, A1, B1, C1, in( 2), 11); \ + GG1(C1, D1, A1, B1, in(11), 13); \ + GG1(B1, C1, D1, A1, in( 8), 12); \ + \ + HH1(A1, B1, C1, D1, in( 3), 11); \ + HH1(D1, A1, B1, C1, in(10), 13); \ + HH1(C1, D1, A1, B1, in( 2), 14); \ + HH1(B1, C1, D1, A1, in( 4), 7); \ + HH1(A1, B1, C1, D1, in( 9), 14); \ + HH1(D1, A1, B1, C1, in(15), 9); \ + HH1(C1, D1, A1, B1, in( 8), 13); \ + HH1(B1, C1, D1, A1, in( 1), 15); \ + HH1(A1, B1, C1, D1, in(14), 6); \ + HH1(D1, A1, B1, C1, in( 7), 8); \ + HH1(C1, D1, A1, B1, in( 0), 13); \ + HH1(B1, C1, D1, A1, in( 6), 6); \ + HH1(A1, B1, C1, D1, in(11), 12); \ + HH1(D1, A1, B1, C1, in(13), 5); \ + HH1(C1, D1, A1, B1, in( 5), 7); \ + HH1(B1, C1, D1, A1, in(12), 5); \ + \ + FF2(A2, B2, C2, D2, in( 0), 11); \ + FF2(D2, A2, B2, C2, in( 1), 14); \ + FF2(C2, D2, A2, B2, in( 2), 15); \ + FF2(B2, C2, D2, A2, in( 3), 12); \ + FF2(A2, B2, C2, D2, in( 4), 5); \ + FF2(D2, A2, B2, C2, in( 5), 8); \ + FF2(C2, D2, A2, B2, in( 6), 7); \ + FF2(B2, C2, D2, A2, in( 7), 9); \ + FF2(A2, B2, C2, D2, in( 8), 11); \ + FF2(D2, A2, B2, C2, in( 9), 13); \ + FF2(C2, D2, A2, B2, in(10), 14); \ + FF2(B2, C2, D2, A2, in(11), 15); \ + FF2(A2, B2, C2, D2, in(12), 6); \ + FF2(D2, A2, B2, C2, in(13), 7); \ + FF2(C2, D2, A2, B2, in(14), 9); \ + FF2(B2, C2, D2, A2, in(15), 8); \ + \ + GG2(A2, B2, C2, D2, in( 7), 7); \ + GG2(D2, A2, B2, C2, in( 4), 6); \ + GG2(C2, D2, A2, B2, in(13), 8); \ + GG2(B2, C2, D2, A2, in( 1), 13); \ + GG2(A2, B2, C2, D2, in(10), 11); \ + GG2(D2, A2, B2, C2, in( 6), 9); \ + GG2(C2, D2, A2, B2, in(15), 7); \ + GG2(B2, C2, D2, A2, in( 3), 15); \ + GG2(A2, B2, C2, D2, in(12), 7); \ + GG2(D2, A2, B2, C2, in( 0), 12); \ + GG2(C2, D2, A2, B2, in( 9), 15); \ + GG2(B2, C2, D2, A2, in( 5), 9); \ + GG2(A2, B2, C2, D2, in(14), 7); \ + GG2(D2, A2, B2, C2, in( 2), 11); \ + GG2(C2, D2, A2, B2, in(11), 13); \ + GG2(B2, C2, D2, A2, in( 8), 12); \ + \ + HH2(A2, B2, C2, D2, in( 3), 11); \ + HH2(D2, A2, B2, C2, in(10), 13); \ + HH2(C2, D2, A2, B2, in( 2), 14); \ + HH2(B2, C2, D2, A2, in( 4), 7); \ + HH2(A2, B2, C2, D2, in( 9), 14); \ + HH2(D2, A2, B2, C2, in(15), 9); \ + HH2(C2, D2, A2, B2, in( 8), 13); \ + HH2(B2, C2, D2, A2, in( 1), 15); \ + HH2(A2, B2, C2, D2, in(14), 6); \ + HH2(D2, A2, B2, C2, in( 7), 8); \ + HH2(C2, D2, A2, B2, in( 0), 13); \ + HH2(B2, C2, D2, A2, in( 6), 6); \ + HH2(A2, B2, C2, D2, in(11), 12); \ + HH2(D2, A2, B2, C2, in(13), 5); \ + HH2(C2, D2, A2, B2, in( 5), 7); \ + HH2(B2, C2, D2, A2, in(12), 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + A2); \ + (h)[2] = SPH_T32((h)[3] + A1 + B2); \ + (h)[3] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD. The data must be aligned for 32-bit access. + */ +static void +ripemd_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD_IN(x) X_var[x] + +#endif + RIPEMD_ROUND_BODY(RIPEMD_IN, r); +#undef RIPEMD_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd_init(void *cc) +{ + sph_ripemd_context *sc; + + sc = cc; + memcpy(sc->val, oIV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd_round +#define HASH ripemd +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd_close(void *cc, void *dst) +{ + ripemd_close(cc, dst, 4); + sph_ripemd_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]) +{ +#define RIPEMD_IN(x) msg[x] + RIPEMD_ROUND_BODY(RIPEMD_IN, val); +#undef RIPEMD_IN +} + +/* ===================================================================== */ +/* + * RIPEMD-128. + */ + +/* + * Round constants for RIPEMD-128. + */ +#define sK11 SPH_C32(0x00000000) +#define sK12 SPH_C32(0x5A827999) +#define sK13 SPH_C32(0x6ED9EBA1) +#define sK14 SPH_C32(0x8F1BBCDC) + +#define sK21 SPH_C32(0x50A28BE6) +#define sK22 SPH_C32(0x5C4DD124) +#define sK23 SPH_C32(0x6D703EF3) +#define sK24 SPH_C32(0x00000000) + +#define sRR(a, b, c, d, f, s, r, k) do { \ + a = ROTL(SPH_T32(a + f(b, c, d) + r + k), s); \ + } while (0) + +#define sROUND1(a, b, c, d, f, s, r, k) \ + sRR(a ## 1, b ## 1, c ## 1, d ## 1, f, s, r, sK1 ## k) + +#define sROUND2(a, b, c, d, f, s, r, k) \ + sRR(a ## 2, b ## 2, c ## 2, d ## 2, f, s, r, sK2 ## k) + +/* + * This macro defines the body for a RIPEMD-128 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 4 words which + * contains the input and output of the compression function. + */ + +#define RIPEMD128_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1; \ + sph_u32 A2, B2, C2, D2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + \ + sROUND1(A, B, C, D, F1, 11, in( 0), 1); \ + sROUND1(D, A, B, C, F1, 14, in( 1), 1); \ + sROUND1(C, D, A, B, F1, 15, in( 2), 1); \ + sROUND1(B, C, D, A, F1, 12, in( 3), 1); \ + sROUND1(A, B, C, D, F1, 5, in( 4), 1); \ + sROUND1(D, A, B, C, F1, 8, in( 5), 1); \ + sROUND1(C, D, A, B, F1, 7, in( 6), 1); \ + sROUND1(B, C, D, A, F1, 9, in( 7), 1); \ + sROUND1(A, B, C, D, F1, 11, in( 8), 1); \ + sROUND1(D, A, B, C, F1, 13, in( 9), 1); \ + sROUND1(C, D, A, B, F1, 14, in(10), 1); \ + sROUND1(B, C, D, A, F1, 15, in(11), 1); \ + sROUND1(A, B, C, D, F1, 6, in(12), 1); \ + sROUND1(D, A, B, C, F1, 7, in(13), 1); \ + sROUND1(C, D, A, B, F1, 9, in(14), 1); \ + sROUND1(B, C, D, A, F1, 8, in(15), 1); \ + \ + sROUND1(A, B, C, D, F2, 7, in( 7), 2); \ + sROUND1(D, A, B, C, F2, 6, in( 4), 2); \ + sROUND1(C, D, A, B, F2, 8, in(13), 2); \ + sROUND1(B, C, D, A, F2, 13, in( 1), 2); \ + sROUND1(A, B, C, D, F2, 11, in(10), 2); \ + sROUND1(D, A, B, C, F2, 9, in( 6), 2); \ + sROUND1(C, D, A, B, F2, 7, in(15), 2); \ + sROUND1(B, C, D, A, F2, 15, in( 3), 2); \ + sROUND1(A, B, C, D, F2, 7, in(12), 2); \ + sROUND1(D, A, B, C, F2, 12, in( 0), 2); \ + sROUND1(C, D, A, B, F2, 15, in( 9), 2); \ + sROUND1(B, C, D, A, F2, 9, in( 5), 2); \ + sROUND1(A, B, C, D, F2, 11, in( 2), 2); \ + sROUND1(D, A, B, C, F2, 7, in(14), 2); \ + sROUND1(C, D, A, B, F2, 13, in(11), 2); \ + sROUND1(B, C, D, A, F2, 12, in( 8), 2); \ + \ + sROUND1(A, B, C, D, F3, 11, in( 3), 3); \ + sROUND1(D, A, B, C, F3, 13, in(10), 3); \ + sROUND1(C, D, A, B, F3, 6, in(14), 3); \ + sROUND1(B, C, D, A, F3, 7, in( 4), 3); \ + sROUND1(A, B, C, D, F3, 14, in( 9), 3); \ + sROUND1(D, A, B, C, F3, 9, in(15), 3); \ + sROUND1(C, D, A, B, F3, 13, in( 8), 3); \ + sROUND1(B, C, D, A, F3, 15, in( 1), 3); \ + sROUND1(A, B, C, D, F3, 14, in( 2), 3); \ + sROUND1(D, A, B, C, F3, 8, in( 7), 3); \ + sROUND1(C, D, A, B, F3, 13, in( 0), 3); \ + sROUND1(B, C, D, A, F3, 6, in( 6), 3); \ + sROUND1(A, B, C, D, F3, 5, in(13), 3); \ + sROUND1(D, A, B, C, F3, 12, in(11), 3); \ + sROUND1(C, D, A, B, F3, 7, in( 5), 3); \ + sROUND1(B, C, D, A, F3, 5, in(12), 3); \ + \ + sROUND1(A, B, C, D, F4, 11, in( 1), 4); \ + sROUND1(D, A, B, C, F4, 12, in( 9), 4); \ + sROUND1(C, D, A, B, F4, 14, in(11), 4); \ + sROUND1(B, C, D, A, F4, 15, in(10), 4); \ + sROUND1(A, B, C, D, F4, 14, in( 0), 4); \ + sROUND1(D, A, B, C, F4, 15, in( 8), 4); \ + sROUND1(C, D, A, B, F4, 9, in(12), 4); \ + sROUND1(B, C, D, A, F4, 8, in( 4), 4); \ + sROUND1(A, B, C, D, F4, 9, in(13), 4); \ + sROUND1(D, A, B, C, F4, 14, in( 3), 4); \ + sROUND1(C, D, A, B, F4, 5, in( 7), 4); \ + sROUND1(B, C, D, A, F4, 6, in(15), 4); \ + sROUND1(A, B, C, D, F4, 8, in(14), 4); \ + sROUND1(D, A, B, C, F4, 6, in( 5), 4); \ + sROUND1(C, D, A, B, F4, 5, in( 6), 4); \ + sROUND1(B, C, D, A, F4, 12, in( 2), 4); \ + \ + sROUND2(A, B, C, D, F4, 8, in( 5), 1); \ + sROUND2(D, A, B, C, F4, 9, in(14), 1); \ + sROUND2(C, D, A, B, F4, 9, in( 7), 1); \ + sROUND2(B, C, D, A, F4, 11, in( 0), 1); \ + sROUND2(A, B, C, D, F4, 13, in( 9), 1); \ + sROUND2(D, A, B, C, F4, 15, in( 2), 1); \ + sROUND2(C, D, A, B, F4, 15, in(11), 1); \ + sROUND2(B, C, D, A, F4, 5, in( 4), 1); \ + sROUND2(A, B, C, D, F4, 7, in(13), 1); \ + sROUND2(D, A, B, C, F4, 7, in( 6), 1); \ + sROUND2(C, D, A, B, F4, 8, in(15), 1); \ + sROUND2(B, C, D, A, F4, 11, in( 8), 1); \ + sROUND2(A, B, C, D, F4, 14, in( 1), 1); \ + sROUND2(D, A, B, C, F4, 14, in(10), 1); \ + sROUND2(C, D, A, B, F4, 12, in( 3), 1); \ + sROUND2(B, C, D, A, F4, 6, in(12), 1); \ + \ + sROUND2(A, B, C, D, F3, 9, in( 6), 2); \ + sROUND2(D, A, B, C, F3, 13, in(11), 2); \ + sROUND2(C, D, A, B, F3, 15, in( 3), 2); \ + sROUND2(B, C, D, A, F3, 7, in( 7), 2); \ + sROUND2(A, B, C, D, F3, 12, in( 0), 2); \ + sROUND2(D, A, B, C, F3, 8, in(13), 2); \ + sROUND2(C, D, A, B, F3, 9, in( 5), 2); \ + sROUND2(B, C, D, A, F3, 11, in(10), 2); \ + sROUND2(A, B, C, D, F3, 7, in(14), 2); \ + sROUND2(D, A, B, C, F3, 7, in(15), 2); \ + sROUND2(C, D, A, B, F3, 12, in( 8), 2); \ + sROUND2(B, C, D, A, F3, 7, in(12), 2); \ + sROUND2(A, B, C, D, F3, 6, in( 4), 2); \ + sROUND2(D, A, B, C, F3, 15, in( 9), 2); \ + sROUND2(C, D, A, B, F3, 13, in( 1), 2); \ + sROUND2(B, C, D, A, F3, 11, in( 2), 2); \ + \ + sROUND2(A, B, C, D, F2, 9, in(15), 3); \ + sROUND2(D, A, B, C, F2, 7, in( 5), 3); \ + sROUND2(C, D, A, B, F2, 15, in( 1), 3); \ + sROUND2(B, C, D, A, F2, 11, in( 3), 3); \ + sROUND2(A, B, C, D, F2, 8, in( 7), 3); \ + sROUND2(D, A, B, C, F2, 6, in(14), 3); \ + sROUND2(C, D, A, B, F2, 6, in( 6), 3); \ + sROUND2(B, C, D, A, F2, 14, in( 9), 3); \ + sROUND2(A, B, C, D, F2, 12, in(11), 3); \ + sROUND2(D, A, B, C, F2, 13, in( 8), 3); \ + sROUND2(C, D, A, B, F2, 5, in(12), 3); \ + sROUND2(B, C, D, A, F2, 14, in( 2), 3); \ + sROUND2(A, B, C, D, F2, 13, in(10), 3); \ + sROUND2(D, A, B, C, F2, 13, in( 0), 3); \ + sROUND2(C, D, A, B, F2, 7, in( 4), 3); \ + sROUND2(B, C, D, A, F2, 5, in(13), 3); \ + \ + sROUND2(A, B, C, D, F1, 15, in( 8), 4); \ + sROUND2(D, A, B, C, F1, 5, in( 6), 4); \ + sROUND2(C, D, A, B, F1, 8, in( 4), 4); \ + sROUND2(B, C, D, A, F1, 11, in( 1), 4); \ + sROUND2(A, B, C, D, F1, 14, in( 3), 4); \ + sROUND2(D, A, B, C, F1, 14, in(11), 4); \ + sROUND2(C, D, A, B, F1, 6, in(15), 4); \ + sROUND2(B, C, D, A, F1, 14, in( 0), 4); \ + sROUND2(A, B, C, D, F1, 6, in( 5), 4); \ + sROUND2(D, A, B, C, F1, 9, in(12), 4); \ + sROUND2(C, D, A, B, F1, 12, in( 2), 4); \ + sROUND2(B, C, D, A, F1, 9, in(13), 4); \ + sROUND2(A, B, C, D, F1, 12, in( 9), 4); \ + sROUND2(D, A, B, C, F1, 5, in( 7), 4); \ + sROUND2(C, D, A, B, F1, 15, in(10), 4); \ + sROUND2(B, C, D, A, F1, 8, in(14), 4); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + A2); \ + (h)[2] = SPH_T32((h)[3] + A1 + B2); \ + (h)[3] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD-128. The data must be aligned for 32-bit access. + */ +static void +ripemd128_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD128_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD128_IN(x) X_var[x] + +#endif + RIPEMD128_ROUND_BODY(RIPEMD128_IN, r); +#undef RIPEMD128_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd128_init(void *cc) +{ + sph_ripemd128_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd128_round +#define HASH ripemd128 +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd128_close(void *cc, void *dst) +{ + ripemd128_close(cc, dst, 4); + sph_ripemd128_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]) +{ +#define RIPEMD128_IN(x) msg[x] + RIPEMD128_ROUND_BODY(RIPEMD128_IN, val); +#undef RIPEMD128_IN +} + +/* ===================================================================== */ +/* + * RIPEMD-160. + */ + +/* + * Round constants for RIPEMD-160. + */ +#define K11 SPH_C32(0x00000000) +#define K12 SPH_C32(0x5A827999) +#define K13 SPH_C32(0x6ED9EBA1) +#define K14 SPH_C32(0x8F1BBCDC) +#define K15 SPH_C32(0xA953FD4E) + +#define K21 SPH_C32(0x50A28BE6) +#define K22 SPH_C32(0x5C4DD124) +#define K23 SPH_C32(0x6D703EF3) +#define K24 SPH_C32(0x7A6D76E9) +#define K25 SPH_C32(0x00000000) + +#define RR(a, b, c, d, e, f, s, r, k) do { \ + a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \ + c = ROTL(c, 10); \ + } while (0) + +#define ROUND1(a, b, c, d, e, f, s, r, k) \ + RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) + +#define ROUND2(a, b, c, d, e, f, s, r, k) \ + RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) + +/* + * This macro defines the body for a RIPEMD-160 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 5 words which + * contains the input and output of the compression function. + */ + +#define RIPEMD160_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1, E1; \ + sph_u32 A2, B2, C2, D2, E2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + E1 = E2 = (h)[4]; \ + \ + ROUND1(A, B, C, D, E, F1, 11, in( 0), 1); \ + ROUND1(E, A, B, C, D, F1, 14, in( 1), 1); \ + ROUND1(D, E, A, B, C, F1, 15, in( 2), 1); \ + ROUND1(C, D, E, A, B, F1, 12, in( 3), 1); \ + ROUND1(B, C, D, E, A, F1, 5, in( 4), 1); \ + ROUND1(A, B, C, D, E, F1, 8, in( 5), 1); \ + ROUND1(E, A, B, C, D, F1, 7, in( 6), 1); \ + ROUND1(D, E, A, B, C, F1, 9, in( 7), 1); \ + ROUND1(C, D, E, A, B, F1, 11, in( 8), 1); \ + ROUND1(B, C, D, E, A, F1, 13, in( 9), 1); \ + ROUND1(A, B, C, D, E, F1, 14, in(10), 1); \ + ROUND1(E, A, B, C, D, F1, 15, in(11), 1); \ + ROUND1(D, E, A, B, C, F1, 6, in(12), 1); \ + ROUND1(C, D, E, A, B, F1, 7, in(13), 1); \ + ROUND1(B, C, D, E, A, F1, 9, in(14), 1); \ + ROUND1(A, B, C, D, E, F1, 8, in(15), 1); \ + \ + ROUND1(E, A, B, C, D, F2, 7, in( 7), 2); \ + ROUND1(D, E, A, B, C, F2, 6, in( 4), 2); \ + ROUND1(C, D, E, A, B, F2, 8, in(13), 2); \ + ROUND1(B, C, D, E, A, F2, 13, in( 1), 2); \ + ROUND1(A, B, C, D, E, F2, 11, in(10), 2); \ + ROUND1(E, A, B, C, D, F2, 9, in( 6), 2); \ + ROUND1(D, E, A, B, C, F2, 7, in(15), 2); \ + ROUND1(C, D, E, A, B, F2, 15, in( 3), 2); \ + ROUND1(B, C, D, E, A, F2, 7, in(12), 2); \ + ROUND1(A, B, C, D, E, F2, 12, in( 0), 2); \ + ROUND1(E, A, B, C, D, F2, 15, in( 9), 2); \ + ROUND1(D, E, A, B, C, F2, 9, in( 5), 2); \ + ROUND1(C, D, E, A, B, F2, 11, in( 2), 2); \ + ROUND1(B, C, D, E, A, F2, 7, in(14), 2); \ + ROUND1(A, B, C, D, E, F2, 13, in(11), 2); \ + ROUND1(E, A, B, C, D, F2, 12, in( 8), 2); \ + \ + ROUND1(D, E, A, B, C, F3, 11, in( 3), 3); \ + ROUND1(C, D, E, A, B, F3, 13, in(10), 3); \ + ROUND1(B, C, D, E, A, F3, 6, in(14), 3); \ + ROUND1(A, B, C, D, E, F3, 7, in( 4), 3); \ + ROUND1(E, A, B, C, D, F3, 14, in( 9), 3); \ + ROUND1(D, E, A, B, C, F3, 9, in(15), 3); \ + ROUND1(C, D, E, A, B, F3, 13, in( 8), 3); \ + ROUND1(B, C, D, E, A, F3, 15, in( 1), 3); \ + ROUND1(A, B, C, D, E, F3, 14, in( 2), 3); \ + ROUND1(E, A, B, C, D, F3, 8, in( 7), 3); \ + ROUND1(D, E, A, B, C, F3, 13, in( 0), 3); \ + ROUND1(C, D, E, A, B, F3, 6, in( 6), 3); \ + ROUND1(B, C, D, E, A, F3, 5, in(13), 3); \ + ROUND1(A, B, C, D, E, F3, 12, in(11), 3); \ + ROUND1(E, A, B, C, D, F3, 7, in( 5), 3); \ + ROUND1(D, E, A, B, C, F3, 5, in(12), 3); \ + \ + ROUND1(C, D, E, A, B, F4, 11, in( 1), 4); \ + ROUND1(B, C, D, E, A, F4, 12, in( 9), 4); \ + ROUND1(A, B, C, D, E, F4, 14, in(11), 4); \ + ROUND1(E, A, B, C, D, F4, 15, in(10), 4); \ + ROUND1(D, E, A, B, C, F4, 14, in( 0), 4); \ + ROUND1(C, D, E, A, B, F4, 15, in( 8), 4); \ + ROUND1(B, C, D, E, A, F4, 9, in(12), 4); \ + ROUND1(A, B, C, D, E, F4, 8, in( 4), 4); \ + ROUND1(E, A, B, C, D, F4, 9, in(13), 4); \ + ROUND1(D, E, A, B, C, F4, 14, in( 3), 4); \ + ROUND1(C, D, E, A, B, F4, 5, in( 7), 4); \ + ROUND1(B, C, D, E, A, F4, 6, in(15), 4); \ + ROUND1(A, B, C, D, E, F4, 8, in(14), 4); \ + ROUND1(E, A, B, C, D, F4, 6, in( 5), 4); \ + ROUND1(D, E, A, B, C, F4, 5, in( 6), 4); \ + ROUND1(C, D, E, A, B, F4, 12, in( 2), 4); \ + \ + ROUND1(B, C, D, E, A, F5, 9, in( 4), 5); \ + ROUND1(A, B, C, D, E, F5, 15, in( 0), 5); \ + ROUND1(E, A, B, C, D, F5, 5, in( 5), 5); \ + ROUND1(D, E, A, B, C, F5, 11, in( 9), 5); \ + ROUND1(C, D, E, A, B, F5, 6, in( 7), 5); \ + ROUND1(B, C, D, E, A, F5, 8, in(12), 5); \ + ROUND1(A, B, C, D, E, F5, 13, in( 2), 5); \ + ROUND1(E, A, B, C, D, F5, 12, in(10), 5); \ + ROUND1(D, E, A, B, C, F5, 5, in(14), 5); \ + ROUND1(C, D, E, A, B, F5, 12, in( 1), 5); \ + ROUND1(B, C, D, E, A, F5, 13, in( 3), 5); \ + ROUND1(A, B, C, D, E, F5, 14, in( 8), 5); \ + ROUND1(E, A, B, C, D, F5, 11, in(11), 5); \ + ROUND1(D, E, A, B, C, F5, 8, in( 6), 5); \ + ROUND1(C, D, E, A, B, F5, 5, in(15), 5); \ + ROUND1(B, C, D, E, A, F5, 6, in(13), 5); \ + \ + ROUND2(A, B, C, D, E, F5, 8, in( 5), 1); \ + ROUND2(E, A, B, C, D, F5, 9, in(14), 1); \ + ROUND2(D, E, A, B, C, F5, 9, in( 7), 1); \ + ROUND2(C, D, E, A, B, F5, 11, in( 0), 1); \ + ROUND2(B, C, D, E, A, F5, 13, in( 9), 1); \ + ROUND2(A, B, C, D, E, F5, 15, in( 2), 1); \ + ROUND2(E, A, B, C, D, F5, 15, in(11), 1); \ + ROUND2(D, E, A, B, C, F5, 5, in( 4), 1); \ + ROUND2(C, D, E, A, B, F5, 7, in(13), 1); \ + ROUND2(B, C, D, E, A, F5, 7, in( 6), 1); \ + ROUND2(A, B, C, D, E, F5, 8, in(15), 1); \ + ROUND2(E, A, B, C, D, F5, 11, in( 8), 1); \ + ROUND2(D, E, A, B, C, F5, 14, in( 1), 1); \ + ROUND2(C, D, E, A, B, F5, 14, in(10), 1); \ + ROUND2(B, C, D, E, A, F5, 12, in( 3), 1); \ + ROUND2(A, B, C, D, E, F5, 6, in(12), 1); \ + \ + ROUND2(E, A, B, C, D, F4, 9, in( 6), 2); \ + ROUND2(D, E, A, B, C, F4, 13, in(11), 2); \ + ROUND2(C, D, E, A, B, F4, 15, in( 3), 2); \ + ROUND2(B, C, D, E, A, F4, 7, in( 7), 2); \ + ROUND2(A, B, C, D, E, F4, 12, in( 0), 2); \ + ROUND2(E, A, B, C, D, F4, 8, in(13), 2); \ + ROUND2(D, E, A, B, C, F4, 9, in( 5), 2); \ + ROUND2(C, D, E, A, B, F4, 11, in(10), 2); \ + ROUND2(B, C, D, E, A, F4, 7, in(14), 2); \ + ROUND2(A, B, C, D, E, F4, 7, in(15), 2); \ + ROUND2(E, A, B, C, D, F4, 12, in( 8), 2); \ + ROUND2(D, E, A, B, C, F4, 7, in(12), 2); \ + ROUND2(C, D, E, A, B, F4, 6, in( 4), 2); \ + ROUND2(B, C, D, E, A, F4, 15, in( 9), 2); \ + ROUND2(A, B, C, D, E, F4, 13, in( 1), 2); \ + ROUND2(E, A, B, C, D, F4, 11, in( 2), 2); \ + \ + ROUND2(D, E, A, B, C, F3, 9, in(15), 3); \ + ROUND2(C, D, E, A, B, F3, 7, in( 5), 3); \ + ROUND2(B, C, D, E, A, F3, 15, in( 1), 3); \ + ROUND2(A, B, C, D, E, F3, 11, in( 3), 3); \ + ROUND2(E, A, B, C, D, F3, 8, in( 7), 3); \ + ROUND2(D, E, A, B, C, F3, 6, in(14), 3); \ + ROUND2(C, D, E, A, B, F3, 6, in( 6), 3); \ + ROUND2(B, C, D, E, A, F3, 14, in( 9), 3); \ + ROUND2(A, B, C, D, E, F3, 12, in(11), 3); \ + ROUND2(E, A, B, C, D, F3, 13, in( 8), 3); \ + ROUND2(D, E, A, B, C, F3, 5, in(12), 3); \ + ROUND2(C, D, E, A, B, F3, 14, in( 2), 3); \ + ROUND2(B, C, D, E, A, F3, 13, in(10), 3); \ + ROUND2(A, B, C, D, E, F3, 13, in( 0), 3); \ + ROUND2(E, A, B, C, D, F3, 7, in( 4), 3); \ + ROUND2(D, E, A, B, C, F3, 5, in(13), 3); \ + \ + ROUND2(C, D, E, A, B, F2, 15, in( 8), 4); \ + ROUND2(B, C, D, E, A, F2, 5, in( 6), 4); \ + ROUND2(A, B, C, D, E, F2, 8, in( 4), 4); \ + ROUND2(E, A, B, C, D, F2, 11, in( 1), 4); \ + ROUND2(D, E, A, B, C, F2, 14, in( 3), 4); \ + ROUND2(C, D, E, A, B, F2, 14, in(11), 4); \ + ROUND2(B, C, D, E, A, F2, 6, in(15), 4); \ + ROUND2(A, B, C, D, E, F2, 14, in( 0), 4); \ + ROUND2(E, A, B, C, D, F2, 6, in( 5), 4); \ + ROUND2(D, E, A, B, C, F2, 9, in(12), 4); \ + ROUND2(C, D, E, A, B, F2, 12, in( 2), 4); \ + ROUND2(B, C, D, E, A, F2, 9, in(13), 4); \ + ROUND2(A, B, C, D, E, F2, 12, in( 9), 4); \ + ROUND2(E, A, B, C, D, F2, 5, in( 7), 4); \ + ROUND2(D, E, A, B, C, F2, 15, in(10), 4); \ + ROUND2(C, D, E, A, B, F2, 8, in(14), 4); \ + \ + ROUND2(B, C, D, E, A, F1, 8, in(12), 5); \ + ROUND2(A, B, C, D, E, F1, 5, in(15), 5); \ + ROUND2(E, A, B, C, D, F1, 12, in(10), 5); \ + ROUND2(D, E, A, B, C, F1, 9, in( 4), 5); \ + ROUND2(C, D, E, A, B, F1, 12, in( 1), 5); \ + ROUND2(B, C, D, E, A, F1, 5, in( 5), 5); \ + ROUND2(A, B, C, D, E, F1, 14, in( 8), 5); \ + ROUND2(E, A, B, C, D, F1, 6, in( 7), 5); \ + ROUND2(D, E, A, B, C, F1, 8, in( 6), 5); \ + ROUND2(C, D, E, A, B, F1, 13, in( 2), 5); \ + ROUND2(B, C, D, E, A, F1, 6, in(13), 5); \ + ROUND2(A, B, C, D, E, F1, 5, in(14), 5); \ + ROUND2(E, A, B, C, D, F1, 15, in( 0), 5); \ + ROUND2(D, E, A, B, C, F1, 13, in( 3), 5); \ + ROUND2(C, D, E, A, B, F1, 11, in( 9), 5); \ + ROUND2(B, C, D, E, A, F1, 11, in(11), 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + E2); \ + (h)[2] = SPH_T32((h)[3] + E1 + A2); \ + (h)[3] = SPH_T32((h)[4] + A1 + B2); \ + (h)[4] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD-160. The data must be aligned for 32-bit access. + */ +static void +ripemd160_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD160_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD160_IN(x) X_var[x] + +#endif + RIPEMD160_ROUND_BODY(RIPEMD160_IN, r); +#undef RIPEMD160_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd160_init(void *cc) +{ + sph_ripemd160_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd160_round +#define HASH ripemd160 +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd160_close(void *cc, void *dst) +{ + ripemd160_close(cc, dst, 5); + sph_ripemd160_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]) +{ +#define RIPEMD160_IN(x) msg[x] + RIPEMD160_ROUND_BODY(RIPEMD160_IN, val); +#undef RIPEMD160_IN +} diff --git a/sph/sha2.c b/sph/sha2.c new file mode 100644 index 0000000000..d13a49514b --- /dev/null +++ b/sph/sha2.c @@ -0,0 +1,630 @@ +/* + * Copyright 2011 ArtForz + * Copyright 2011-2013 pooler + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. See COPYING for more details. + */ + +#include "cpuminer-config.h" +#include "miner.h" + +#include +#include + +#if defined(__arm__) && defined(__APCS_32__) +#define EXTERN_SHA256 +#endif + +static const uint32_t sha256_h[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; + +static const uint32_t sha256_k[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +void sha256_init(uint32_t *state) +{ + memcpy(state, sha256_h, 32); +} + +/* Elementary functions used by SHA256 */ +#define Ch(x, y, z) ((x & (y ^ z)) ^ z) +#define Maj(x, y, z) ((x & (y | z)) | (y & z)) +#define ROTR(x, n) ((x >> n) | (x << (32 - n))) +#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ (x >> 3)) +#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ (x >> 10)) + +/* SHA256 round function */ +#define RND(a, b, c, d, e, f, g, h, k) \ + do { \ + t0 = h + S1(e) + Ch(e, f, g) + k; \ + t1 = S0(a) + Maj(a, b, c); \ + d += t0; \ + h = t0 + t1; \ + } while (0) + +/* Adjusted round function for rotating state */ +#define RNDr(S, W, i) \ + RND(S[(64 - i) % 8], S[(65 - i) % 8], \ + S[(66 - i) % 8], S[(67 - i) % 8], \ + S[(68 - i) % 8], S[(69 - i) % 8], \ + S[(70 - i) % 8], S[(71 - i) % 8], \ + W[i] + sha256_k[i]) + +#ifndef EXTERN_SHA256 + +/* + * SHA256 block compression function. The 256-bit state is transformed via + * the 512-bit input block to produce a new state. + */ +void sha256_transform(uint32_t *state, const uint32_t *block, int swap) +{ + uint32_t W[64]; + uint32_t S[8]; + uint32_t t0, t1; + int i; + + /* 1. Prepare message schedule W. */ + if (swap) { + for (i = 0; i < 16; i++) + W[i] = swab32(block[i]); + } else + memcpy(W, block, 64); + for (i = 16; i < 64; i += 2) { + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15]; + } + + /* 2. Initialize working variables. */ + memcpy(S, state, 32); + + /* 3. Mix. */ + RNDr(S, W, 0); + RNDr(S, W, 1); + RNDr(S, W, 2); + RNDr(S, W, 3); + RNDr(S, W, 4); + RNDr(S, W, 5); + RNDr(S, W, 6); + RNDr(S, W, 7); + RNDr(S, W, 8); + RNDr(S, W, 9); + RNDr(S, W, 10); + RNDr(S, W, 11); + RNDr(S, W, 12); + RNDr(S, W, 13); + RNDr(S, W, 14); + RNDr(S, W, 15); + RNDr(S, W, 16); + RNDr(S, W, 17); + RNDr(S, W, 18); + RNDr(S, W, 19); + RNDr(S, W, 20); + RNDr(S, W, 21); + RNDr(S, W, 22); + RNDr(S, W, 23); + RNDr(S, W, 24); + RNDr(S, W, 25); + RNDr(S, W, 26); + RNDr(S, W, 27); + RNDr(S, W, 28); + RNDr(S, W, 29); + RNDr(S, W, 30); + RNDr(S, W, 31); + RNDr(S, W, 32); + RNDr(S, W, 33); + RNDr(S, W, 34); + RNDr(S, W, 35); + RNDr(S, W, 36); + RNDr(S, W, 37); + RNDr(S, W, 38); + RNDr(S, W, 39); + RNDr(S, W, 40); + RNDr(S, W, 41); + RNDr(S, W, 42); + RNDr(S, W, 43); + RNDr(S, W, 44); + RNDr(S, W, 45); + RNDr(S, W, 46); + RNDr(S, W, 47); + RNDr(S, W, 48); + RNDr(S, W, 49); + RNDr(S, W, 50); + RNDr(S, W, 51); + RNDr(S, W, 52); + RNDr(S, W, 53); + RNDr(S, W, 54); + RNDr(S, W, 55); + RNDr(S, W, 56); + RNDr(S, W, 57); + RNDr(S, W, 58); + RNDr(S, W, 59); + RNDr(S, W, 60); + RNDr(S, W, 61); + RNDr(S, W, 62); + RNDr(S, W, 63); + + /* 4. Mix local working variables into global state */ + for (i = 0; i < 8; i++) + state[i] += S[i]; +} + +#endif /* EXTERN_SHA256 */ + + +static const uint32_t sha256d_hash1[16] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x80000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000100 +}; + +static void sha256d_80_swap(uint32_t *hash, const uint32_t *data) +{ + uint32_t S[16]; + int i; + + sha256_init(S); + sha256_transform(S, data, 0); + sha256_transform(S, data + 16, 0); + memcpy(S + 8, sha256d_hash1 + 8, 32); + sha256_init(hash); + sha256_transform(hash, S, 0); + for (i = 0; i < 8; i++) + hash[i] = swab32(hash[i]); +} + +void sha256d(unsigned char *hash, const unsigned char *data, int len) +{ + uint32_t S[16], T[16]; + int i, r; + + sha256_init(S); + for (r = len; r > -9; r -= 64) { + if (r < 64) + memset(T, 0, 64); + memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r)); + if (r >= 0 && r < 64) + ((unsigned char *)T)[r] = 0x80; + for (i = 0; i < 16; i++) + T[i] = be32dec(T + i); + if (r < 56) + T[15] = 8 * len; + sha256_transform(S, T, 0); + } + memcpy(S + 8, sha256d_hash1 + 8, 32); + sha256_init(T); + sha256_transform(T, S, 0); + for (i = 0; i < 8; i++) + be32enc((uint32_t *)hash + i, T[i]); +} + +static inline void sha256d_preextend(uint32_t *W) +{ + W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0]; + W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1]; + W[18] = s1(W[16]) + W[11] + W[ 2]; + W[19] = s1(W[17]) + W[12] + s0(W[ 4]); + W[20] = W[13] + s0(W[ 5]) + W[ 4]; + W[21] = W[14] + s0(W[ 6]) + W[ 5]; + W[22] = W[15] + s0(W[ 7]) + W[ 6]; + W[23] = W[16] + s0(W[ 8]) + W[ 7]; + W[24] = W[17] + s0(W[ 9]) + W[ 8]; + W[25] = s0(W[10]) + W[ 9]; + W[26] = s0(W[11]) + W[10]; + W[27] = s0(W[12]) + W[11]; + W[28] = s0(W[13]) + W[12]; + W[29] = s0(W[14]) + W[13]; + W[30] = s0(W[15]) + W[14]; + W[31] = s0(W[16]) + W[15]; +} + +static inline void sha256d_prehash(uint32_t *S, const uint32_t *W) +{ + uint32_t t0, t1; + RNDr(S, W, 0); + RNDr(S, W, 1); + RNDr(S, W, 2); +} + +#ifdef EXTERN_SHA256 + +void sha256d_ms(uint32_t *hash, uint32_t *W, + const uint32_t *midstate, const uint32_t *prehash); + +#else + +static inline void sha256d_ms(uint32_t *hash, uint32_t *W, + const uint32_t *midstate, const uint32_t *prehash) +{ + uint32_t S[64]; + uint32_t t0, t1; + int i; + + S[18] = W[18]; + S[19] = W[19]; + S[20] = W[20]; + S[22] = W[22]; + S[23] = W[23]; + S[24] = W[24]; + S[30] = W[30]; + S[31] = W[31]; + + W[18] += s0(W[3]); + W[19] += W[3]; + W[20] += s1(W[18]); + W[21] = s1(W[19]); + W[22] += s1(W[20]); + W[23] += s1(W[21]); + W[24] += s1(W[22]); + W[25] = s1(W[23]) + W[18]; + W[26] = s1(W[24]) + W[19]; + W[27] = s1(W[25]) + W[20]; + W[28] = s1(W[26]) + W[21]; + W[29] = s1(W[27]) + W[22]; + W[30] += s1(W[28]) + W[23]; + W[31] += s1(W[29]) + W[24]; + for (i = 32; i < 64; i += 2) { + W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16]; + W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15]; + } + + memcpy(S, prehash, 32); + + RNDr(S, W, 3); + RNDr(S, W, 4); + RNDr(S, W, 5); + RNDr(S, W, 6); + RNDr(S, W, 7); + RNDr(S, W, 8); + RNDr(S, W, 9); + RNDr(S, W, 10); + RNDr(S, W, 11); + RNDr(S, W, 12); + RNDr(S, W, 13); + RNDr(S, W, 14); + RNDr(S, W, 15); + RNDr(S, W, 16); + RNDr(S, W, 17); + RNDr(S, W, 18); + RNDr(S, W, 19); + RNDr(S, W, 20); + RNDr(S, W, 21); + RNDr(S, W, 22); + RNDr(S, W, 23); + RNDr(S, W, 24); + RNDr(S, W, 25); + RNDr(S, W, 26); + RNDr(S, W, 27); + RNDr(S, W, 28); + RNDr(S, W, 29); + RNDr(S, W, 30); + RNDr(S, W, 31); + RNDr(S, W, 32); + RNDr(S, W, 33); + RNDr(S, W, 34); + RNDr(S, W, 35); + RNDr(S, W, 36); + RNDr(S, W, 37); + RNDr(S, W, 38); + RNDr(S, W, 39); + RNDr(S, W, 40); + RNDr(S, W, 41); + RNDr(S, W, 42); + RNDr(S, W, 43); + RNDr(S, W, 44); + RNDr(S, W, 45); + RNDr(S, W, 46); + RNDr(S, W, 47); + RNDr(S, W, 48); + RNDr(S, W, 49); + RNDr(S, W, 50); + RNDr(S, W, 51); + RNDr(S, W, 52); + RNDr(S, W, 53); + RNDr(S, W, 54); + RNDr(S, W, 55); + RNDr(S, W, 56); + RNDr(S, W, 57); + RNDr(S, W, 58); + RNDr(S, W, 59); + RNDr(S, W, 60); + RNDr(S, W, 61); + RNDr(S, W, 62); + RNDr(S, W, 63); + + for (i = 0; i < 8; i++) + S[i] += midstate[i]; + + W[18] = S[18]; + W[19] = S[19]; + W[20] = S[20]; + W[22] = S[22]; + W[23] = S[23]; + W[24] = S[24]; + W[30] = S[30]; + W[31] = S[31]; + + memcpy(S + 8, sha256d_hash1 + 8, 32); + S[16] = s1(sha256d_hash1[14]) + sha256d_hash1[ 9] + s0(S[ 1]) + S[ 0]; + S[17] = s1(sha256d_hash1[15]) + sha256d_hash1[10] + s0(S[ 2]) + S[ 1]; + S[18] = s1(S[16]) + sha256d_hash1[11] + s0(S[ 3]) + S[ 2]; + S[19] = s1(S[17]) + sha256d_hash1[12] + s0(S[ 4]) + S[ 3]; + S[20] = s1(S[18]) + sha256d_hash1[13] + s0(S[ 5]) + S[ 4]; + S[21] = s1(S[19]) + sha256d_hash1[14] + s0(S[ 6]) + S[ 5]; + S[22] = s1(S[20]) + sha256d_hash1[15] + s0(S[ 7]) + S[ 6]; + S[23] = s1(S[21]) + S[16] + s0(sha256d_hash1[ 8]) + S[ 7]; + S[24] = s1(S[22]) + S[17] + s0(sha256d_hash1[ 9]) + sha256d_hash1[ 8]; + S[25] = s1(S[23]) + S[18] + s0(sha256d_hash1[10]) + sha256d_hash1[ 9]; + S[26] = s1(S[24]) + S[19] + s0(sha256d_hash1[11]) + sha256d_hash1[10]; + S[27] = s1(S[25]) + S[20] + s0(sha256d_hash1[12]) + sha256d_hash1[11]; + S[28] = s1(S[26]) + S[21] + s0(sha256d_hash1[13]) + sha256d_hash1[12]; + S[29] = s1(S[27]) + S[22] + s0(sha256d_hash1[14]) + sha256d_hash1[13]; + S[30] = s1(S[28]) + S[23] + s0(sha256d_hash1[15]) + sha256d_hash1[14]; + S[31] = s1(S[29]) + S[24] + s0(S[16]) + sha256d_hash1[15]; + for (i = 32; i < 60; i += 2) { + S[i] = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16]; + S[i+1] = s1(S[i - 1]) + S[i - 6] + s0(S[i - 14]) + S[i - 15]; + } + S[60] = s1(S[58]) + S[53] + s0(S[45]) + S[44]; + + sha256_init(hash); + + RNDr(hash, S, 0); + RNDr(hash, S, 1); + RNDr(hash, S, 2); + RNDr(hash, S, 3); + RNDr(hash, S, 4); + RNDr(hash, S, 5); + RNDr(hash, S, 6); + RNDr(hash, S, 7); + RNDr(hash, S, 8); + RNDr(hash, S, 9); + RNDr(hash, S, 10); + RNDr(hash, S, 11); + RNDr(hash, S, 12); + RNDr(hash, S, 13); + RNDr(hash, S, 14); + RNDr(hash, S, 15); + RNDr(hash, S, 16); + RNDr(hash, S, 17); + RNDr(hash, S, 18); + RNDr(hash, S, 19); + RNDr(hash, S, 20); + RNDr(hash, S, 21); + RNDr(hash, S, 22); + RNDr(hash, S, 23); + RNDr(hash, S, 24); + RNDr(hash, S, 25); + RNDr(hash, S, 26); + RNDr(hash, S, 27); + RNDr(hash, S, 28); + RNDr(hash, S, 29); + RNDr(hash, S, 30); + RNDr(hash, S, 31); + RNDr(hash, S, 32); + RNDr(hash, S, 33); + RNDr(hash, S, 34); + RNDr(hash, S, 35); + RNDr(hash, S, 36); + RNDr(hash, S, 37); + RNDr(hash, S, 38); + RNDr(hash, S, 39); + RNDr(hash, S, 40); + RNDr(hash, S, 41); + RNDr(hash, S, 42); + RNDr(hash, S, 43); + RNDr(hash, S, 44); + RNDr(hash, S, 45); + RNDr(hash, S, 46); + RNDr(hash, S, 47); + RNDr(hash, S, 48); + RNDr(hash, S, 49); + RNDr(hash, S, 50); + RNDr(hash, S, 51); + RNDr(hash, S, 52); + RNDr(hash, S, 53); + RNDr(hash, S, 54); + RNDr(hash, S, 55); + RNDr(hash, S, 56); + + hash[2] += hash[6] + S1(hash[3]) + Ch(hash[3], hash[4], hash[5]) + + S[57] + sha256_k[57]; + hash[1] += hash[5] + S1(hash[2]) + Ch(hash[2], hash[3], hash[4]) + + S[58] + sha256_k[58]; + hash[0] += hash[4] + S1(hash[1]) + Ch(hash[1], hash[2], hash[3]) + + S[59] + sha256_k[59]; + hash[7] += hash[3] + S1(hash[0]) + Ch(hash[0], hash[1], hash[2]) + + S[60] + sha256_k[60] + + sha256_h[7]; +} + +#endif /* EXTERN_SHA256 */ + +#ifdef HAVE_SHA256_4WAY + +void sha256d_ms_4way(uint32_t *hash, uint32_t *data, + const uint32_t *midstate, const uint32_t *prehash); + +static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t data[4 * 64] __attribute__((aligned(128))); + uint32_t hash[4 * 8] __attribute__((aligned(32))); + uint32_t midstate[4 * 8] __attribute__((aligned(32))); + uint32_t prehash[4 * 8] __attribute__((aligned(32))); + uint32_t n = pdata[19] - 1; + const uint32_t first_nonce = pdata[19]; + const uint32_t Htarg = ptarget[7]; + int i, j; + + memcpy(data, pdata + 16, 64); + sha256d_preextend(data); + for (i = 31; i >= 0; i--) + for (j = 0; j < 4; j++) + data[i * 4 + j] = data[i]; + + sha256_init(midstate); + sha256_transform(midstate, pdata, 0); + memcpy(prehash, midstate, 32); + sha256d_prehash(prehash, pdata + 16); + for (i = 7; i >= 0; i--) { + for (j = 0; j < 4; j++) { + midstate[i * 4 + j] = midstate[i]; + prehash[i * 4 + j] = prehash[i]; + } + } + + do { + for (i = 0; i < 4; i++) + data[4 * 3 + i] = ++n; + + sha256d_ms_4way(hash, data, midstate, prehash); + + for (i = 0; i < 4; i++) { + if (swab32(hash[4 * 7 + i]) <= Htarg) { + pdata[19] = data[4 * 3 + i]; + sha256d_80_swap(hash, pdata); + if (fulltest(hash, ptarget)) { + *hashes_done = n - first_nonce + 1; + return 1; + } + } + } + } while (n < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce + 1; + pdata[19] = n; + return 0; +} + +#endif /* HAVE_SHA256_4WAY */ + +#ifdef HAVE_SHA256_8WAY + +void sha256d_ms_8way(uint32_t *hash, uint32_t *data, + const uint32_t *midstate, const uint32_t *prehash); + +static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t data[8 * 64] __attribute__((aligned(128))); + uint32_t hash[8 * 8] __attribute__((aligned(32))); + uint32_t midstate[8 * 8] __attribute__((aligned(32))); + uint32_t prehash[8 * 8] __attribute__((aligned(32))); + uint32_t n = pdata[19] - 1; + const uint32_t first_nonce = pdata[19]; + const uint32_t Htarg = ptarget[7]; + int i, j; + + memcpy(data, pdata + 16, 64); + sha256d_preextend(data); + for (i = 31; i >= 0; i--) + for (j = 0; j < 8; j++) + data[i * 8 + j] = data[i]; + + sha256_init(midstate); + sha256_transform(midstate, pdata, 0); + memcpy(prehash, midstate, 32); + sha256d_prehash(prehash, pdata + 16); + for (i = 7; i >= 0; i--) { + for (j = 0; j < 8; j++) { + midstate[i * 8 + j] = midstate[i]; + prehash[i * 8 + j] = prehash[i]; + } + } + + do { + for (i = 0; i < 8; i++) + data[8 * 3 + i] = ++n; + + sha256d_ms_8way(hash, data, midstate, prehash); + + for (i = 0; i < 8; i++) { + if (swab32(hash[8 * 7 + i]) <= Htarg) { + pdata[19] = data[8 * 3 + i]; + sha256d_80_swap(hash, pdata); + if (fulltest(hash, ptarget)) { + *hashes_done = n - first_nonce + 1; + return 1; + } + } + } + } while (n < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce + 1; + pdata[19] = n; + return 0; +} + +#endif /* HAVE_SHA256_8WAY */ + +int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget, + uint32_t max_nonce, unsigned long *hashes_done) +{ + uint32_t data[64] __attribute__((aligned(128))); + uint32_t hash[8] __attribute__((aligned(32))); + uint32_t midstate[8] __attribute__((aligned(32))); + uint32_t prehash[8] __attribute__((aligned(32))); + uint32_t n = pdata[19] - 1; + const uint32_t first_nonce = pdata[19]; + const uint32_t Htarg = ptarget[7]; + +#ifdef HAVE_SHA256_8WAY + if (sha256_use_8way()) + return scanhash_sha256d_8way(thr_id, pdata, ptarget, + max_nonce, hashes_done); +#endif +#ifdef HAVE_SHA256_4WAY + if (sha256_use_4way()) + return scanhash_sha256d_4way(thr_id, pdata, ptarget, + max_nonce, hashes_done); +#endif + + memcpy(data, pdata + 16, 64); + sha256d_preextend(data); + + sha256_init(midstate); + sha256_transform(midstate, pdata, 0); + memcpy(prehash, midstate, 32); + sha256d_prehash(prehash, pdata + 16); + + do { + data[3] = ++n; + sha256d_ms(hash, data, midstate, prehash); + if (swab32(hash[7]) <= Htarg) { + pdata[19] = data[3]; + sha256d_80_swap(hash, pdata); + if (fulltest(hash, ptarget)) { + *hashes_done = n - first_nonce + 1; + return 1; + } + } + } while (n < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = n - first_nonce + 1; + pdata[19] = n; + return 0; +} diff --git a/sph/sha2big.c b/sph/sha2big.c new file mode 100644 index 0000000000..00a7e7f70d --- /dev/null +++ b/sph/sha2big.c @@ -0,0 +1,256 @@ +/* $Id: sha2big.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * SHA-384 / SHA-512 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_sha2.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#if SPH_64 + +#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) +#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z))) + +#define ROTR64 SPH_ROTR64 + +#define BSG5_0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39)) +#define BSG5_1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41)) +#define SSG5_0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SPH_T64((x) >> 7)) +#define SSG5_1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SPH_T64((x) >> 6)) + +static const sph_u64 K512[80] = { + SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD), + SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC), + SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019), + SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118), + SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE), + SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2), + SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1), + SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694), + SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3), + SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65), + SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483), + SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5), + SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210), + SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4), + SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725), + SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70), + SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926), + SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF), + SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8), + SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B), + SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001), + SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30), + SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910), + SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8), + SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53), + SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8), + SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB), + SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3), + SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60), + SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC), + SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9), + SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B), + SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207), + SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178), + SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6), + SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B), + SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493), + SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C), + SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A), + SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817) +}; + +static const sph_u64 H384[8] = { + SPH_C64(0xCBBB9D5DC1059ED8), SPH_C64(0x629A292A367CD507), + SPH_C64(0x9159015A3070DD17), SPH_C64(0x152FECD8F70E5939), + SPH_C64(0x67332667FFC00B31), SPH_C64(0x8EB44A8768581511), + SPH_C64(0xDB0C2E0D64F98FA7), SPH_C64(0x47B5481DBEFA4FA4) +}; + +static const sph_u64 H512[8] = { + SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B), + SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1), + SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F), + SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179) +}; + +/* + * This macro defines the body for a SHA-384 / SHA-512 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "r" parameter should evaluate to + * an array or pointer expression designating the array of 8 words which + * contains the input and output of the compression function. + * + * SHA-512 is hard for the compiler. If the loop is completely unrolled, + * then the code will be quite huge (possibly more than 100 kB), and the + * performance will be degraded due to cache misses on the code. We + * unroll only eight steps, which avoids all needless copies when + * 64-bit registers are swapped. + */ + +#define SHA3_STEP(A, B, C, D, E, F, G, H, i) do { \ + sph_u64 T1, T2; \ + T1 = SPH_T64(H + BSG5_1(E) + CH(E, F, G) + K512[i] + W[i]); \ + T2 = SPH_T64(BSG5_0(A) + MAJ(A, B, C)); \ + D = SPH_T64(D + T1); \ + H = SPH_T64(T1 + T2); \ + } while (0) + +#define SHA3_ROUND_BODY(in, r) do { \ + int i; \ + sph_u64 A, B, C, D, E, F, G, H; \ + sph_u64 W[80]; \ + \ + for (i = 0; i < 16; i ++) \ + W[i] = in(i); \ + \ + for (i = 16; i < 80; i ++) \ + W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] \ + + SSG5_0(W[i - 15]) + W[i - 16]); \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + D = (r)[3]; \ + E = (r)[4]; \ + F = (r)[5]; \ + G = (r)[6]; \ + H = (r)[7]; \ + for (i = 0; i < 80; i += 8) { \ + SHA3_STEP(A, B, C, D, E, F, G, H, i + 0); \ + SHA3_STEP(H, A, B, C, D, E, F, G, i + 1); \ + SHA3_STEP(G, H, A, B, C, D, E, F, i + 2); \ + SHA3_STEP(F, G, H, A, B, C, D, E, i + 3); \ + SHA3_STEP(E, F, G, H, A, B, C, D, i + 4); \ + SHA3_STEP(D, E, F, G, H, A, B, C, i + 5); \ + SHA3_STEP(C, D, E, F, G, H, A, B, i + 6); \ + SHA3_STEP(B, C, D, E, F, G, H, A, i + 7); \ + } \ + (r)[0] = SPH_T64((r)[0] + A); \ + (r)[1] = SPH_T64((r)[1] + B); \ + (r)[2] = SPH_T64((r)[2] + C); \ + (r)[3] = SPH_T64((r)[3] + D); \ + (r)[4] = SPH_T64((r)[4] + E); \ + (r)[5] = SPH_T64((r)[5] + F); \ + (r)[6] = SPH_T64((r)[6] + G); \ + (r)[7] = SPH_T64((r)[7] + H); \ + } while (0) + +/* + * One round of SHA-384 / SHA-512. The data must be aligned for 64-bit access. + */ +static void +sha3_round(const unsigned char *data, sph_u64 r[8]) +{ +#define SHA3_IN(x) sph_dec64be_aligned(data + (8 * (x))) + SHA3_ROUND_BODY(SHA3_IN, r); +#undef SHA3_IN +} + +/* see sph_sha3.h */ +void +sph_sha384_init(void *cc) +{ + sph_sha384_context *sc; + + sc = cc; + memcpy(sc->val, H384, sizeof H384); + sc->count = 0; +} + +/* see sph_sha3.h */ +void +sph_sha512_init(void *cc) +{ + sph_sha512_context *sc; + + sc = cc; + memcpy(sc->val, H512, sizeof H512); + sc->count = 0; +} + +#define RFUN sha3_round +#define HASH sha384 +#define BE64 1 +#include "md_helper.c" + +/* see sph_sha3.h */ +void +sph_sha384_close(void *cc, void *dst) +{ + sha384_close(cc, dst, 6); + sph_sha384_init(cc); +} + +/* see sph_sha3.h */ +void +sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha384_addbits_and_close(cc, ub, n, dst, 6); + sph_sha384_init(cc); +} + +/* see sph_sha3.h */ +void +sph_sha512_close(void *cc, void *dst) +{ + sha384_close(cc, dst, 8); + sph_sha512_init(cc); +} + +/* see sph_sha3.h */ +void +sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha384_addbits_and_close(cc, ub, n, dst, 8); + sph_sha512_init(cc); +} + +/* see sph_sha3.h */ +void +sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]) +{ +#define SHA3_IN(x) msg[x] + SHA3_ROUND_BODY(SHA3_IN, val); +#undef SHA3_IN +} + +#endif +#ifdef __cplusplus +} +#endif + diff --git a/sph/shabal.c b/sph/shabal.c new file mode 100644 index 0000000000..4f5162140f --- /dev/null +++ b/sph/shabal.c @@ -0,0 +1,799 @@ +/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */ +/* + * Shabal implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_shabal.h" + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +/* + * Part of this code was automatically generated (the part between + * the "BEGIN" and "END" markers). + */ + +#define sM 16 + +#define C32 SPH_C32 +#define T32 SPH_T32 + +#define O1 13 +#define O2 9 +#define O3 6 + +/* + * We copy the state into local variables, so that the compiler knows + * that it can optimize them at will. + */ + +/* BEGIN -- automatically generated code. */ + +#define DECL_STATE \ + sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \ + A08, A09, A0A, A0B; \ + sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \ + B8, B9, BA, BB, BC, BD, BE, BF; \ + sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \ + C8, C9, CA, CB, CC, CD, CE, CF; \ + sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \ + M8, M9, MA, MB, MC, MD, ME, MF; \ + sph_u32 Wlow, Whigh; + +#define READ_STATE(state) do { \ + A00 = (state)->A[0]; \ + A01 = (state)->A[1]; \ + A02 = (state)->A[2]; \ + A03 = (state)->A[3]; \ + A04 = (state)->A[4]; \ + A05 = (state)->A[5]; \ + A06 = (state)->A[6]; \ + A07 = (state)->A[7]; \ + A08 = (state)->A[8]; \ + A09 = (state)->A[9]; \ + A0A = (state)->A[10]; \ + A0B = (state)->A[11]; \ + B0 = (state)->B[0]; \ + B1 = (state)->B[1]; \ + B2 = (state)->B[2]; \ + B3 = (state)->B[3]; \ + B4 = (state)->B[4]; \ + B5 = (state)->B[5]; \ + B6 = (state)->B[6]; \ + B7 = (state)->B[7]; \ + B8 = (state)->B[8]; \ + B9 = (state)->B[9]; \ + BA = (state)->B[10]; \ + BB = (state)->B[11]; \ + BC = (state)->B[12]; \ + BD = (state)->B[13]; \ + BE = (state)->B[14]; \ + BF = (state)->B[15]; \ + C0 = (state)->C[0]; \ + C1 = (state)->C[1]; \ + C2 = (state)->C[2]; \ + C3 = (state)->C[3]; \ + C4 = (state)->C[4]; \ + C5 = (state)->C[5]; \ + C6 = (state)->C[6]; \ + C7 = (state)->C[7]; \ + C8 = (state)->C[8]; \ + C9 = (state)->C[9]; \ + CA = (state)->C[10]; \ + CB = (state)->C[11]; \ + CC = (state)->C[12]; \ + CD = (state)->C[13]; \ + CE = (state)->C[14]; \ + CF = (state)->C[15]; \ + Wlow = (state)->Wlow; \ + Whigh = (state)->Whigh; \ + } while (0) + +#define WRITE_STATE(state) do { \ + (state)->A[0] = A00; \ + (state)->A[1] = A01; \ + (state)->A[2] = A02; \ + (state)->A[3] = A03; \ + (state)->A[4] = A04; \ + (state)->A[5] = A05; \ + (state)->A[6] = A06; \ + (state)->A[7] = A07; \ + (state)->A[8] = A08; \ + (state)->A[9] = A09; \ + (state)->A[10] = A0A; \ + (state)->A[11] = A0B; \ + (state)->B[0] = B0; \ + (state)->B[1] = B1; \ + (state)->B[2] = B2; \ + (state)->B[3] = B3; \ + (state)->B[4] = B4; \ + (state)->B[5] = B5; \ + (state)->B[6] = B6; \ + (state)->B[7] = B7; \ + (state)->B[8] = B8; \ + (state)->B[9] = B9; \ + (state)->B[10] = BA; \ + (state)->B[11] = BB; \ + (state)->B[12] = BC; \ + (state)->B[13] = BD; \ + (state)->B[14] = BE; \ + (state)->B[15] = BF; \ + (state)->C[0] = C0; \ + (state)->C[1] = C1; \ + (state)->C[2] = C2; \ + (state)->C[3] = C3; \ + (state)->C[4] = C4; \ + (state)->C[5] = C5; \ + (state)->C[6] = C6; \ + (state)->C[7] = C7; \ + (state)->C[8] = C8; \ + (state)->C[9] = C9; \ + (state)->C[10] = CA; \ + (state)->C[11] = CB; \ + (state)->C[12] = CC; \ + (state)->C[13] = CD; \ + (state)->C[14] = CE; \ + (state)->C[15] = CF; \ + (state)->Wlow = Wlow; \ + (state)->Whigh = Whigh; \ + } while (0) + +#define DECODE_BLOCK do { \ + M0 = sph_dec32le_aligned(buf + 0); \ + M1 = sph_dec32le_aligned(buf + 4); \ + M2 = sph_dec32le_aligned(buf + 8); \ + M3 = sph_dec32le_aligned(buf + 12); \ + M4 = sph_dec32le_aligned(buf + 16); \ + M5 = sph_dec32le_aligned(buf + 20); \ + M6 = sph_dec32le_aligned(buf + 24); \ + M7 = sph_dec32le_aligned(buf + 28); \ + M8 = sph_dec32le_aligned(buf + 32); \ + M9 = sph_dec32le_aligned(buf + 36); \ + MA = sph_dec32le_aligned(buf + 40); \ + MB = sph_dec32le_aligned(buf + 44); \ + MC = sph_dec32le_aligned(buf + 48); \ + MD = sph_dec32le_aligned(buf + 52); \ + ME = sph_dec32le_aligned(buf + 56); \ + MF = sph_dec32le_aligned(buf + 60); \ + } while (0) + +#define INPUT_BLOCK_ADD do { \ + B0 = T32(B0 + M0); \ + B1 = T32(B1 + M1); \ + B2 = T32(B2 + M2); \ + B3 = T32(B3 + M3); \ + B4 = T32(B4 + M4); \ + B5 = T32(B5 + M5); \ + B6 = T32(B6 + M6); \ + B7 = T32(B7 + M7); \ + B8 = T32(B8 + M8); \ + B9 = T32(B9 + M9); \ + BA = T32(BA + MA); \ + BB = T32(BB + MB); \ + BC = T32(BC + MC); \ + BD = T32(BD + MD); \ + BE = T32(BE + ME); \ + BF = T32(BF + MF); \ + } while (0) + +#define INPUT_BLOCK_SUB do { \ + C0 = T32(C0 - M0); \ + C1 = T32(C1 - M1); \ + C2 = T32(C2 - M2); \ + C3 = T32(C3 - M3); \ + C4 = T32(C4 - M4); \ + C5 = T32(C5 - M5); \ + C6 = T32(C6 - M6); \ + C7 = T32(C7 - M7); \ + C8 = T32(C8 - M8); \ + C9 = T32(C9 - M9); \ + CA = T32(CA - MA); \ + CB = T32(CB - MB); \ + CC = T32(CC - MC); \ + CD = T32(CD - MD); \ + CE = T32(CE - ME); \ + CF = T32(CF - MF); \ + } while (0) + +#define XOR_W do { \ + A00 ^= Wlow; \ + A01 ^= Whigh; \ + } while (0) + +#define SWAP(v1, v2) do { \ + sph_u32 tmp = (v1); \ + (v1) = (v2); \ + (v2) = tmp; \ + } while (0) + +#define SWAP_BC do { \ + SWAP(B0, C0); \ + SWAP(B1, C1); \ + SWAP(B2, C2); \ + SWAP(B3, C3); \ + SWAP(B4, C4); \ + SWAP(B5, C5); \ + SWAP(B6, C6); \ + SWAP(B7, C7); \ + SWAP(B8, C8); \ + SWAP(B9, C9); \ + SWAP(BA, CA); \ + SWAP(BB, CB); \ + SWAP(BC, CC); \ + SWAP(BD, CD); \ + SWAP(BE, CE); \ + SWAP(BF, CF); \ + } while (0) + +#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \ + xa0 = T32((xa0 \ + ^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \ + ^ xc) * 3U) \ + ^ xb1 ^ (xb2 & ~xb3) ^ xm; \ + xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \ + } while (0) + +#define PERM_STEP_0 do { \ + PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define PERM_STEP_1 do { \ + PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define PERM_STEP_2 do { \ + PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \ + } while (0) + +#define APPLY_P do { \ + B0 = T32(B0 << 17) | (B0 >> 15); \ + B1 = T32(B1 << 17) | (B1 >> 15); \ + B2 = T32(B2 << 17) | (B2 >> 15); \ + B3 = T32(B3 << 17) | (B3 >> 15); \ + B4 = T32(B4 << 17) | (B4 >> 15); \ + B5 = T32(B5 << 17) | (B5 >> 15); \ + B6 = T32(B6 << 17) | (B6 >> 15); \ + B7 = T32(B7 << 17) | (B7 >> 15); \ + B8 = T32(B8 << 17) | (B8 >> 15); \ + B9 = T32(B9 << 17) | (B9 >> 15); \ + BA = T32(BA << 17) | (BA >> 15); \ + BB = T32(BB << 17) | (BB >> 15); \ + BC = T32(BC << 17) | (BC >> 15); \ + BD = T32(BD << 17) | (BD >> 15); \ + BE = T32(BE << 17) | (BE >> 15); \ + BF = T32(BF << 17) | (BF >> 15); \ + PERM_STEP_0; \ + PERM_STEP_1; \ + PERM_STEP_2; \ + A0B = T32(A0B + C6); \ + A0A = T32(A0A + C5); \ + A09 = T32(A09 + C4); \ + A08 = T32(A08 + C3); \ + A07 = T32(A07 + C2); \ + A06 = T32(A06 + C1); \ + A05 = T32(A05 + C0); \ + A04 = T32(A04 + CF); \ + A03 = T32(A03 + CE); \ + A02 = T32(A02 + CD); \ + A01 = T32(A01 + CC); \ + A00 = T32(A00 + CB); \ + A0B = T32(A0B + CA); \ + A0A = T32(A0A + C9); \ + A09 = T32(A09 + C8); \ + A08 = T32(A08 + C7); \ + A07 = T32(A07 + C6); \ + A06 = T32(A06 + C5); \ + A05 = T32(A05 + C4); \ + A04 = T32(A04 + C3); \ + A03 = T32(A03 + C2); \ + A02 = T32(A02 + C1); \ + A01 = T32(A01 + C0); \ + A00 = T32(A00 + CF); \ + A0B = T32(A0B + CE); \ + A0A = T32(A0A + CD); \ + A09 = T32(A09 + CC); \ + A08 = T32(A08 + CB); \ + A07 = T32(A07 + CA); \ + A06 = T32(A06 + C9); \ + A05 = T32(A05 + C8); \ + A04 = T32(A04 + C7); \ + A03 = T32(A03 + C6); \ + A02 = T32(A02 + C5); \ + A01 = T32(A01 + C4); \ + A00 = T32(A00 + C3); \ + } while (0) + +#define INCR_W do { \ + if ((Wlow = T32(Wlow + 1)) == 0) \ + Whigh = T32(Whigh + 1); \ + } while (0) + +static const sph_u32 A_init_192[] = { + C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E), + C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465), + C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9) +}; + +static const sph_u32 B_init_192[] = { + C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824), + C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7), + C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319), + C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C) +}; + +static const sph_u32 C_init_192[] = { + C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B), + C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640), + C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3), + C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669) +}; + +static const sph_u32 A_init_224[] = { + C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B), + C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F), + C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061) +}; + +static const sph_u32 B_init_224[] = { + C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498), + C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5), + C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0), + C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C) +}; + +static const sph_u32 C_init_224[] = { + C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD), + C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18), + C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2), + C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83) +}; + +static const sph_u32 A_init_256[] = { + C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191), + C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C), + C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A) +}; + +static const sph_u32 B_init_256[] = { + C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F), + C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002), + C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890), + C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5) +}; + +static const sph_u32 C_init_256[] = { + C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55), + C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433), + C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F), + C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60) +}; + +static const sph_u32 A_init_384[] = { + C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83), + C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF), + C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D) +}; + +static const sph_u32 B_init_384[] = { + C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F), + C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641), + C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8), + C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36) +}; + +static const sph_u32 C_init_384[] = { + C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399), + C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261), + C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C), + C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70) +}; + +static const sph_u32 A_init_512[] = { + C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632), + C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B), + C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F) +}; + +static const sph_u32 B_init_512[] = { + C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640), + C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08), + C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E), + C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B) +}; + +static const sph_u32 C_init_512[] = { + C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359), + C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780), + C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A), + C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969) +}; + +/* END -- automatically generated code. */ + +static void +shabal_init(void *cc, unsigned size) +{ + /* + * We have precomputed initial states for all the supported + * output bit lengths. + */ + const sph_u32 *A_init, *B_init, *C_init; + sph_shabal_context *sc; + + switch (size) { + case 192: + A_init = A_init_192; + B_init = B_init_192; + C_init = C_init_192; + break; + case 224: + A_init = A_init_224; + B_init = B_init_224; + C_init = C_init_224; + break; + case 256: + A_init = A_init_256; + B_init = B_init_256; + C_init = C_init_256; + break; + case 384: + A_init = A_init_384; + B_init = B_init_384; + C_init = C_init_384; + break; + case 512: + A_init = A_init_512; + B_init = B_init_512; + C_init = C_init_512; + break; + default: + return; + } + sc = cc; + memcpy(sc->A, A_init, sizeof sc->A); + memcpy(sc->B, B_init, sizeof sc->B); + memcpy(sc->C, C_init, sizeof sc->C); + sc->Wlow = 1; + sc->Whigh = 0; + sc->ptr = 0; +} + +static void +shabal_core(void *cc, const unsigned char *data, size_t len) +{ + sph_shabal_context *sc; + unsigned char *buf; + size_t ptr; + DECL_STATE + + sc = cc; + buf = sc->buf; + ptr = sc->ptr; + + /* + * We do not want to copy the state to local variables if the + * amount of data is less than what is needed to complete the + * current block. Note that it is anyway suboptimal to call + * this method many times for small chunks of data. + */ + if (len < (sizeof sc->buf) - ptr) { + memcpy(buf + ptr, data, len); + ptr += len; + sc->ptr = ptr; + return; + } + + READ_STATE(sc); + while (len > 0) { + size_t clen; + + clen = (sizeof sc->buf) - ptr; + if (clen > len) + clen = len; + memcpy(buf + ptr, data, clen); + ptr += clen; + data += clen; + len -= clen; + if (ptr == sizeof sc->buf) { + DECODE_BLOCK; + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + INPUT_BLOCK_SUB; + SWAP_BC; + INCR_W; + ptr = 0; + } + } + WRITE_STATE(sc); + sc->ptr = ptr; +} + +static void +shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words) +{ + sph_shabal_context *sc; + unsigned char *buf; + size_t ptr; + int i; + unsigned z; + union { + unsigned char tmp_out[64]; + sph_u32 dummy; + } u; + size_t out_len; + DECL_STATE + + sc = cc; + buf = sc->buf; + ptr = sc->ptr; + z = 0x80 >> n; + buf[ptr] = ((ub & -z) | z) & 0xFF; + memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1)); + READ_STATE(sc); + DECODE_BLOCK; + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + for (i = 0; i < 3; i ++) { + SWAP_BC; + XOR_W; + APPLY_P; + } + + /* + * We just use our local variables; no need to go through + * the state structure. In order to share some code, we + * emit the relevant words into a temporary buffer, which + * we finally copy into the destination array. + */ + switch (size_words) { + case 16: + sph_enc32le_aligned(u.tmp_out + 0, B0); + sph_enc32le_aligned(u.tmp_out + 4, B1); + sph_enc32le_aligned(u.tmp_out + 8, B2); + sph_enc32le_aligned(u.tmp_out + 12, B3); + /* fall through */ + case 12: + sph_enc32le_aligned(u.tmp_out + 16, B4); + sph_enc32le_aligned(u.tmp_out + 20, B5); + sph_enc32le_aligned(u.tmp_out + 24, B6); + sph_enc32le_aligned(u.tmp_out + 28, B7); + /* fall through */ + case 8: + sph_enc32le_aligned(u.tmp_out + 32, B8); + /* fall through */ + case 7: + sph_enc32le_aligned(u.tmp_out + 36, B9); + /* fall through */ + case 6: + sph_enc32le_aligned(u.tmp_out + 40, BA); + sph_enc32le_aligned(u.tmp_out + 44, BB); + sph_enc32le_aligned(u.tmp_out + 48, BC); + sph_enc32le_aligned(u.tmp_out + 52, BD); + sph_enc32le_aligned(u.tmp_out + 56, BE); + sph_enc32le_aligned(u.tmp_out + 60, BF); + break; + default: + return; + } + out_len = size_words << 2; + memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len); + shabal_init(sc, size_words << 5); +} + +/* see sph_shabal.h */ +void +sph_shabal192_init(void *cc) +{ + shabal_init(cc, 192); +} + +/* see sph_shabal.h */ +void +sph_shabal192(void *cc, const void *data, size_t len) +{ + shabal_core(cc, data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal192_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 6); +} + +/* see sph_shabal.h */ +void +sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 6); +} + +/* see sph_shabal.h */ +void +sph_shabal224_init(void *cc) +{ + shabal_init(cc, 224); +} + +/* see sph_shabal.h */ +void +sph_shabal224(void *cc, const void *data, size_t len) +{ + shabal_core(cc, data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal224_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 7); +} + +/* see sph_shabal.h */ +void +sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 7); +} + +/* see sph_shabal.h */ +void +sph_shabal256_init(void *cc) +{ + shabal_init(cc, 256); +} + +/* see sph_shabal.h */ +void +sph_shabal256(void *cc, const void *data, size_t len) +{ + shabal_core(cc, data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal256_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 8); +} + +/* see sph_shabal.h */ +void +sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 8); +} + +/* see sph_shabal.h */ +void +sph_shabal384_init(void *cc) +{ + shabal_init(cc, 384); +} + +/* see sph_shabal.h */ +void +sph_shabal384(void *cc, const void *data, size_t len) +{ + shabal_core(cc, data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal384_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 12); +} + +/* see sph_shabal.h */ +void +sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 12); +} + +/* see sph_shabal.h */ +void +sph_shabal512_init(void *cc) +{ + shabal_init(cc, 512); +} + +/* see sph_shabal.h */ +void +sph_shabal512(void *cc, const void *data, size_t len) +{ + shabal_core(cc, data, len); +} + +/* see sph_shabal.h */ +void +sph_shabal512_close(void *cc, void *dst) +{ + shabal_close(cc, 0, 0, dst, 16); +} + +/* see sph_shabal.h */ +void +sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + shabal_close(cc, ub, n, dst, 16); +} diff --git a/sph/sph_haval.h b/sph/sph_haval.h new file mode 100644 index 0000000000..409daaf417 --- /dev/null +++ b/sph/sph_haval.h @@ -0,0 +1,976 @@ +/* $Id: sph_haval.h 218 2010-06-08 17:06:34Z tp $ */ +/** + * HAVAL interface. + * + * HAVAL is actually a family of 15 hash functions, depending on whether + * the internal computation uses 3, 4 or 5 passes, and on the output + * length, which is 128, 160, 192, 224 or 256 bits. This implementation + * provides interface functions for all 15, which internally map to + * three cores (depending on the number of passes). Note that output + * lengths other than 256 bits are not obtained by a simple truncation + * of a longer result; the requested length is encoded within the + * padding data. + * + * HAVAL was published in: Yuliang Zheng, Josef Pieprzyk and Jennifer + * Seberry: "HAVAL -- a one-way hashing algorithm with variable length + * of output", Advances in Cryptology -- AUSCRYPT'92, Lecture Notes in + * Computer Science, Vol.718, pp.83-104, Springer-Verlag, 1993. + * + * This paper, and a reference implementation, are available on the + * Calyptix web site: http://labs.calyptix.com/haval.php + * + * The HAVAL reference paper is quite unclear on the data encoding + * details, i.e. endianness (both byte order within a 32-bit word, and + * word order within a message block). This implementation has been + * made compatible with the reference implementation referenced above. + * + * @warning A collision for HAVAL-128/3 (HAVAL with three passes and + * 128-bit output) has been published; this function is thus considered + * as cryptographically broken. The status for other variants is unclear; + * use only with care. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_haval.h + * @author Thomas Pornin + */ + +#ifndef SPH_HAVAL_H__ +#define SPH_HAVAL_H__ + +#ifdef __cplusplus +extern "C"{ +#endif + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for HAVAL-128/3. + */ +#define SPH_SIZE_haval128_3 128 + +/** + * Output size (in bits) for HAVAL-128/4. + */ +#define SPH_SIZE_haval128_4 128 + +/** + * Output size (in bits) for HAVAL-128/5. + */ +#define SPH_SIZE_haval128_5 128 + +/** + * Output size (in bits) for HAVAL-160/3. + */ +#define SPH_SIZE_haval160_3 160 + +/** + * Output size (in bits) for HAVAL-160/4. + */ +#define SPH_SIZE_haval160_4 160 + +/** + * Output size (in bits) for HAVAL-160/5. + */ +#define SPH_SIZE_haval160_5 160 + +/** + * Output size (in bits) for HAVAL-192/3. + */ +#define SPH_SIZE_haval192_3 192 + +/** + * Output size (in bits) for HAVAL-192/4. + */ +#define SPH_SIZE_haval192_4 192 + +/** + * Output size (in bits) for HAVAL-192/5. + */ +#define SPH_SIZE_haval192_5 192 + +/** + * Output size (in bits) for HAVAL-224/3. + */ +#define SPH_SIZE_haval224_3 224 + +/** + * Output size (in bits) for HAVAL-224/4. + */ +#define SPH_SIZE_haval224_4 224 + +/** + * Output size (in bits) for HAVAL-224/5. + */ +#define SPH_SIZE_haval224_5 224 + +/** + * Output size (in bits) for HAVAL-256/3. + */ +#define SPH_SIZE_haval256_3 256 + +/** + * Output size (in bits) for HAVAL-256/4. + */ +#define SPH_SIZE_haval256_4 256 + +/** + * Output size (in bits) for HAVAL-256/5. + */ +#define SPH_SIZE_haval256_5 256 + +/** + * This structure is a context for HAVAL computations: it contains the + * intermediate values and some data from the last entered block. Once + * a HAVAL computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running HAVAL computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[128]; /* first field, for alignment */ + sph_u32 s0, s1, s2, s3, s4, s5, s6, s7; + unsigned olen, passes; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_haval_context; + +/** + * Type for a HAVAL-128/3 context (identical to the common context). + */ +typedef sph_haval_context sph_haval128_3_context; + +/** + * Type for a HAVAL-128/4 context (identical to the common context). + */ +typedef sph_haval_context sph_haval128_4_context; + +/** + * Type for a HAVAL-128/5 context (identical to the common context). + */ +typedef sph_haval_context sph_haval128_5_context; + +/** + * Type for a HAVAL-160/3 context (identical to the common context). + */ +typedef sph_haval_context sph_haval160_3_context; + +/** + * Type for a HAVAL-160/4 context (identical to the common context). + */ +typedef sph_haval_context sph_haval160_4_context; + +/** + * Type for a HAVAL-160/5 context (identical to the common context). + */ +typedef sph_haval_context sph_haval160_5_context; + +/** + * Type for a HAVAL-192/3 context (identical to the common context). + */ +typedef sph_haval_context sph_haval192_3_context; + +/** + * Type for a HAVAL-192/4 context (identical to the common context). + */ +typedef sph_haval_context sph_haval192_4_context; + +/** + * Type for a HAVAL-192/5 context (identical to the common context). + */ +typedef sph_haval_context sph_haval192_5_context; + +/** + * Type for a HAVAL-224/3 context (identical to the common context). + */ +typedef sph_haval_context sph_haval224_3_context; + +/** + * Type for a HAVAL-224/4 context (identical to the common context). + */ +typedef sph_haval_context sph_haval224_4_context; + +/** + * Type for a HAVAL-224/5 context (identical to the common context). + */ +typedef sph_haval_context sph_haval224_5_context; + +/** + * Type for a HAVAL-256/3 context (identical to the common context). + */ +typedef sph_haval_context sph_haval256_3_context; + +/** + * Type for a HAVAL-256/4 context (identical to the common context). + */ +typedef sph_haval_context sph_haval256_4_context; + +/** + * Type for a HAVAL-256/5 context (identical to the common context). + */ +typedef sph_haval_context sph_haval256_5_context; + +/** + * Initialize the context for HAVAL-128/3. + * + * @param cc context to initialize (pointer to a + * sph_haval128_3_context structure) + */ +void sph_haval128_3_init(void *cc); + +/** + * Process some data bytes for HAVAL-128/3. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-128/3 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval128_3(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-128/3 computation. The output buffer must be wide + * enough to accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-128/3 context + * @param dst the output buffer + */ +void sph_haval128_3_close(void *cc, void *dst); + +/** + * Close a HAVAL-128/3 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (16 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-128/3 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval128_3_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-128/4. + * + * @param cc context to initialize (pointer to a + * sph_haval128_4_context structure) + */ +void sph_haval128_4_init(void *cc); + +/** + * Process some data bytes for HAVAL-128/4. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-128/4 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval128_4(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-128/4 computation. The output buffer must be wide + * enough to accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-128/4 context + * @param dst the output buffer + */ +void sph_haval128_4_close(void *cc, void *dst); + +/** + * Close a HAVAL-128/4 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (16 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-128/4 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval128_4_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-128/5. + * + * @param cc context to initialize (pointer to a + * sph_haval128_5_context structure) + */ +void sph_haval128_5_init(void *cc); + +/** + * Process some data bytes for HAVAL-128/5. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-128/5 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval128_5(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-128/5 computation. The output buffer must be wide + * enough to accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-128/5 context + * @param dst the output buffer + */ +void sph_haval128_5_close(void *cc, void *dst); + +/** + * Close a HAVAL-128/5 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (16 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-128/5 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval128_5_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-160/3. + * + * @param cc context to initialize (pointer to a + * sph_haval160_3_context structure) + */ +void sph_haval160_3_init(void *cc); + +/** + * Process some data bytes for HAVAL-160/3. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-160/3 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval160_3(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-160/3 computation. The output buffer must be wide + * enough to accomodate the result (20 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-160/3 context + * @param dst the output buffer + */ +void sph_haval160_3_close(void *cc, void *dst); + +/** + * Close a HAVAL-160/3 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (20 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-160/3 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval160_3_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-160/4. + * + * @param cc context to initialize (pointer to a + * sph_haval160_4_context structure) + */ +void sph_haval160_4_init(void *cc); + +/** + * Process some data bytes for HAVAL-160/4. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-160/4 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval160_4(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-160/4 computation. The output buffer must be wide + * enough to accomodate the result (20 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-160/4 context + * @param dst the output buffer + */ +void sph_haval160_4_close(void *cc, void *dst); + +/** + * Close a HAVAL-160/4 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (20 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-160/4 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval160_3_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-160/5. + * + * @param cc context to initialize (pointer to a + * sph_haval160_5_context structure) + */ +void sph_haval160_5_init(void *cc); + +/** + * Process some data bytes for HAVAL-160/5. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-160/5 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval160_5(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-160/5 computation. The output buffer must be wide + * enough to accomodate the result (20 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-160/5 context + * @param dst the output buffer + */ +void sph_haval160_5_close(void *cc, void *dst); + +/** + * Close a HAVAL-160/5 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (20 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-160/5 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval160_5_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-192/3. + * + * @param cc context to initialize (pointer to a + * sph_haval192_3_context structure) + */ +void sph_haval192_3_init(void *cc); + +/** + * Process some data bytes for HAVAL-192/3. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-192/3 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval192_3(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-192/3 computation. The output buffer must be wide + * enough to accomodate the result (24 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-192/3 context + * @param dst the output buffer + */ +void sph_haval192_3_close(void *cc, void *dst); + +/** + * Close a HAVAL-192/3 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (24 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-192/3 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval192_3_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-192/4. + * + * @param cc context to initialize (pointer to a + * sph_haval192_4_context structure) + */ +void sph_haval192_4_init(void *cc); + +/** + * Process some data bytes for HAVAL-192/4. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-192/4 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval192_4(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-192/4 computation. The output buffer must be wide + * enough to accomodate the result (24 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-192/4 context + * @param dst the output buffer + */ +void sph_haval192_4_close(void *cc, void *dst); + +/** + * Close a HAVAL-192/4 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (24 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-192/4 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval192_4_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-192/5. + * + * @param cc context to initialize (pointer to a + * sph_haval192_5_context structure) + */ +void sph_haval192_5_init(void *cc); + +/** + * Process some data bytes for HAVAL-192/5. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-192/5 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval192_5(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-192/5 computation. The output buffer must be wide + * enough to accomodate the result (24 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-192/5 context + * @param dst the output buffer + */ +void sph_haval192_5_close(void *cc, void *dst); + +/** + * Close a HAVAL-192/5 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (24 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-192/5 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval192_5_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-224/3. + * + * @param cc context to initialize (pointer to a + * sph_haval224_3_context structure) + */ +void sph_haval224_3_init(void *cc); + +/** + * Process some data bytes for HAVAL-224/3. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-224/3 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval224_3(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-224/3 computation. The output buffer must be wide + * enough to accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-224/3 context + * @param dst the output buffer + */ +void sph_haval224_3_close(void *cc, void *dst); + +/** + * Close a HAVAL-224/3 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (28 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-224/3 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval224_3_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-224/4. + * + * @param cc context to initialize (pointer to a + * sph_haval224_4_context structure) + */ +void sph_haval224_4_init(void *cc); + +/** + * Process some data bytes for HAVAL-224/4. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-224/4 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval224_4(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-224/4 computation. The output buffer must be wide + * enough to accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-224/4 context + * @param dst the output buffer + */ +void sph_haval224_4_close(void *cc, void *dst); + +/** + * Close a HAVAL-224/4 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (28 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-224/4 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval224_4_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-224/5. + * + * @param cc context to initialize (pointer to a + * sph_haval224_5_context structure) + */ +void sph_haval224_5_init(void *cc); + +/** + * Process some data bytes for HAVAL-224/5. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-224/5 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval224_5(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-224/5 computation. The output buffer must be wide + * enough to accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-224/5 context + * @param dst the output buffer + */ +void sph_haval224_5_close(void *cc, void *dst); + +/** + * Close a HAVAL-224/5 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (28 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-224/5 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval224_5_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-256/3. + * + * @param cc context to initialize (pointer to a + * sph_haval256_3_context structure) + */ +void sph_haval256_3_init(void *cc); + +/** + * Process some data bytes for HAVAL-256/3. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-256/3 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval256_3(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-256/3 computation. The output buffer must be wide + * enough to accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-256/3 context + * @param dst the output buffer + */ +void sph_haval256_3_close(void *cc, void *dst); + +/** + * Close a HAVAL-256/3 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (32 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-256/3 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval256_3_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-256/4. + * + * @param cc context to initialize (pointer to a + * sph_haval256_4_context structure) + */ +void sph_haval256_4_init(void *cc); + +/** + * Process some data bytes for HAVAL-256/4. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-256/4 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval256_4(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-256/4 computation. The output buffer must be wide + * enough to accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-256/4 context + * @param dst the output buffer + */ +void sph_haval256_4_close(void *cc, void *dst); + +/** + * Close a HAVAL-256/4 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (32 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-256/4 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval256_4_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Initialize the context for HAVAL-256/5. + * + * @param cc context to initialize (pointer to a + * sph_haval256_5_context structure) + */ +void sph_haval256_5_init(void *cc); + +/** + * Process some data bytes for HAVAL-256/5. If len is 0, + * then this function does nothing. + * + * @param cc the HAVAL-256/5 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_haval256_5(void *cc, const void *data, size_t len); + +/** + * Close a HAVAL-256/5 computation. The output buffer must be wide + * enough to accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the HAVAL-256/5 context + * @param dst the output buffer + */ +void sph_haval256_5_close(void *cc, void *dst); + +/** + * Close a HAVAL-256/5 computation. Up to 7 extra input bits may be added + * to the input message; these are the n upper bits of + * the ub byte (i.e. the first extra bit has value 128 in + * ub, the second extra bit has value 64, and so on). Other + * bits in ub are ignored. + * + * The output buffer must be wide enough to accomodate the result (32 + * bytes). The context is automatically reinitialized. + * + * @param cc the HAVAL-256/5 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the output buffer + */ +void sph_haval256_5_addbits_and_close(void *cc, + unsigned ub, unsigned n, void *dst); + +/** + * Apply the HAVAL compression function on the provided data. The + * msg parameter contains the 32 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 8 32-bit input blocks for + * the compression function; the output is written in place in this + * array. This function uses three internal passes. + * + * @param msg the message block (32 values) + * @param val the function 256-bit input and output + */ +void sph_haval_3_comp(const sph_u32 msg[32], sph_u32 val[8]); + +/** + * Apply the HAVAL compression function on the provided data. The + * msg parameter contains the 32 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 8 32-bit input blocks for + * the compression function; the output is written in place in this + * array. This function uses four internal passes. + * + * @param msg the message block (32 values) + * @param val the function 256-bit input and output + */ +void sph_haval_4_comp(const sph_u32 msg[32], sph_u32 val[8]); + +/** + * Apply the HAVAL compression function on the provided data. The + * msg parameter contains the 32 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 8 32-bit input blocks for + * the compression function; the output is written in place in this + * array. This function uses five internal passes. + * + * @param msg the message block (32 values) + * @param val the function 256-bit input and output + */ +void sph_haval_5_comp(const sph_u32 msg[32], sph_u32 val[8]); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/sph/sph_ripemd.h b/sph/sph_ripemd.h new file mode 100644 index 0000000000..256776830f --- /dev/null +++ b/sph/sph_ripemd.h @@ -0,0 +1,273 @@ +/* $Id: sph_ripemd.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * RIPEMD, RIPEMD-128 and RIPEMD-160 interface. + * + * RIPEMD was first described in: Research and Development in Advanced + * Communication Technologies in Europe, "RIPE Integrity Primitives: + * Final Report of RACE Integrity Primitives Evaluation (R1040)", RACE, + * June 1992. + * + * A new, strengthened version, dubbed RIPEMD-160, was published in: H. + * Dobbertin, A. Bosselaers, and B. Preneel, "RIPEMD-160, a strengthened + * version of RIPEMD", Fast Software Encryption - FSE'96, LNCS 1039, + * Springer (1996), pp. 71--82. + * + * This article describes both RIPEMD-160, with a 160-bit output, and a + * reduced version called RIPEMD-128, which has a 128-bit output. RIPEMD-128 + * was meant as a "drop-in" replacement for any hash function with 128-bit + * output, especially the original RIPEMD. + * + * @warning Collisions, and an efficient method to build other collisions, + * have been published for the original RIPEMD, which is thus considered as + * cryptographically broken. It is also very rarely encountered, and there + * seems to exist no free description or implementation of RIPEMD (except + * the sphlib code, of course). As of january 2007, RIPEMD-128 and RIPEMD-160 + * seem as secure as their output length allows. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_ripemd.h + * @author Thomas Pornin + */ + +#ifndef SPH_RIPEMD_H__ +#define SPH_RIPEMD_H__ + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for RIPEMD. + */ +#define SPH_SIZE_ripemd 128 + +/** + * Output size (in bits) for RIPEMD-128. + */ +#define SPH_SIZE_ripemd128 128 + +/** + * Output size (in bits) for RIPEMD-160. + */ +#define SPH_SIZE_ripemd160 160 + +/** + * This structure is a context for RIPEMD computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[4]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd_context; + +/** + * Initialize a RIPEMD context. This process performs no memory allocation. + * + * @param cc the RIPEMD context (pointer to + * a sph_ripemd_context) + */ +void sph_ripemd_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD context + * @param dst the destination buffer + */ +void sph_ripemd_close(void *cc, void *dst); + +/** + * Apply the RIPEMD compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 128-bit input and output + */ +void sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]); + +/* ===================================================================== */ + +/** + * This structure is a context for RIPEMD-128 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD-128 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD-128 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[4]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd128_context; + +/** + * Initialize a RIPEMD-128 context. This process performs no memory allocation. + * + * @param cc the RIPEMD-128 context (pointer to + * a sph_ripemd128_context) + */ +void sph_ripemd128_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD-128 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd128(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD-128 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD-128 context + * @param dst the destination buffer + */ +void sph_ripemd128_close(void *cc, void *dst); + +/** + * Apply the RIPEMD-128 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 128-bit input and output + */ +void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]); + +/* ===================================================================== */ + +/** + * This structure is a context for RIPEMD-160 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD-160 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD-160 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[5]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd160_context; + +/** + * Initialize a RIPEMD-160 context. This process performs no memory allocation. + * + * @param cc the RIPEMD-160 context (pointer to + * a sph_ripemd160_context) + */ +void sph_ripemd160_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD-160 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd160(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD-160 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (20 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD-160 context + * @param dst the destination buffer + */ +void sph_ripemd160_close(void *cc, void *dst); + +/** + * Apply the RIPEMD-160 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 160-bit input and output + */ +void sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]); + +#endif diff --git a/sph/sph_sha2.c b/sph/sph_sha2.c new file mode 100644 index 0000000000..aab2c5518c --- /dev/null +++ b/sph/sph_sha2.c @@ -0,0 +1,691 @@ +/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */ +/* + * SHA-224 / SHA-256 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_sha2.h" + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2 +#define SPH_SMALL_FOOTPRINT_SHA2 1 +#endif + +#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) +#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X))) + +#define ROTR SPH_ROTR32 + +#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3)) +#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10)) + +static const sph_u32 H224[8] = { + SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17), + SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511), + SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4) +}; + +static const sph_u32 H256[8] = { + SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372), + SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C), + SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19) +}; + +/* + * The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256 + * compression function implementation. The "in" parameter should + * evaluate, when applied to a numerical input parameter from 0 to 15, + * to an expression which yields the corresponding input block. The "r" + * parameter should evaluate to an array or pointer expression + * designating the array of 8 words which contains the input and output + * of the compression function. + */ + +#if SPH_SMALL_FOOTPRINT_SHA2 + +static const sph_u32 K[64] = { + SPH_C32(0x428A2F98), SPH_C32(0x71374491), + SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5), + SPH_C32(0x3956C25B), SPH_C32(0x59F111F1), + SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5), + SPH_C32(0xD807AA98), SPH_C32(0x12835B01), + SPH_C32(0x243185BE), SPH_C32(0x550C7DC3), + SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE), + SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174), + SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786), + SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC), + SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA), + SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA), + SPH_C32(0x983E5152), SPH_C32(0xA831C66D), + SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7), + SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147), + SPH_C32(0x06CA6351), SPH_C32(0x14292967), + SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138), + SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13), + SPH_C32(0x650A7354), SPH_C32(0x766A0ABB), + SPH_C32(0x81C2C92E), SPH_C32(0x92722C85), + SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B), + SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3), + SPH_C32(0xD192E819), SPH_C32(0xD6990624), + SPH_C32(0xF40E3585), SPH_C32(0x106AA070), + SPH_C32(0x19A4C116), SPH_C32(0x1E376C08), + SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5), + SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A), + SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3), + SPH_C32(0x748F82EE), SPH_C32(0x78A5636F), + SPH_C32(0x84C87814), SPH_C32(0x8CC70208), + SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB), + SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2) +}; + +#define SHA2_MEXP1(in, pc) do { \ + W[pc] = in(pc); \ + } while (0) + +#define SHA2_MEXP2(in, pc) do { \ + W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \ + + W[((pc) - 7) & 0x0F] \ + + SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \ + } while (0) + +#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \ + sph_u32 t1, t2; \ + SHA2_MEXP ## n(in, pc); \ + t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \ + + K[pcount + (pc)] + W[(pc) & 0x0F]); \ + t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \ + d = SPH_T32(d + t1); \ + h = SPH_T32(t1 + t2); \ + } while (0) + +#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \ + SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc) +#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \ + SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc) + +#define SHA2_ROUND_BODY(in, r) do { \ + sph_u32 A, B, C, D, E, F, G, H; \ + sph_u32 W[16]; \ + unsigned pcount; \ + \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + D = (r)[3]; \ + E = (r)[4]; \ + F = (r)[5]; \ + G = (r)[6]; \ + H = (r)[7]; \ + pcount = 0; \ + SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \ + SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \ + SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \ + SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \ + SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \ + SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \ + SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \ + SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \ + SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \ + SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \ + SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \ + SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \ + SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \ + SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \ + SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \ + SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \ + for (pcount = 16; pcount < 64; pcount += 16) { \ + SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \ + SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \ + SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \ + SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \ + SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \ + SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \ + SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \ + SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \ + SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \ + SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \ + SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \ + SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \ + SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \ + SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \ + SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \ + SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \ + } \ + (r)[0] = SPH_T32((r)[0] + A); \ + (r)[1] = SPH_T32((r)[1] + B); \ + (r)[2] = SPH_T32((r)[2] + C); \ + (r)[3] = SPH_T32((r)[3] + D); \ + (r)[4] = SPH_T32((r)[4] + E); \ + (r)[5] = SPH_T32((r)[5] + F); \ + (r)[6] = SPH_T32((r)[6] + G); \ + (r)[7] = SPH_T32((r)[7] + H); \ + } while (0) + +#else + +#define SHA2_ROUND_BODY(in, r) do { \ + sph_u32 A, B, C, D, E, F, G, H, T1, T2; \ + sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \ + sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \ + int i; \ + \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + D = (r)[3]; \ + E = (r)[4]; \ + F = (r)[5]; \ + G = (r)[6]; \ + H = (r)[7]; \ + W00 = in(0); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x428A2F98) + W00); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W01 = in(1); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x71374491) + W01); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W02 = in(2); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0xB5C0FBCF) + W02); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W03 = in(3); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0xE9B5DBA5) + W03); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W04 = in(4); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x3956C25B) + W04); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W05 = in(5); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x59F111F1) + W05); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W06 = in(6); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x923F82A4) + W06); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W07 = in(7); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0xAB1C5ED5) + W07); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W08 = in(8); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0xD807AA98) + W08); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W09 = in(9); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x12835B01) + W09); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W10 = in(10); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x243185BE) + W10); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W11 = in(11); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x550C7DC3) + W11); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W12 = in(12); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x72BE5D74) + W12); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W13 = in(13); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x80DEB1FE) + W13); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W14 = in(14); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x9BDC06A7) + W14); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W15 = in(15); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0xC19BF174) + W15); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0xE49B69C1) + W00); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0xEFBE4786) + W01); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x0FC19DC6) + W02); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x240CA1CC) + W03); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x2DE92C6F) + W04); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x4A7484AA) + W05); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x5CB0A9DC) + W06); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x76F988DA) + W07); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x983E5152) + W08); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0xA831C66D) + W09); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0xB00327C8) + W10); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0xBF597FC7) + W11); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0xC6E00BF3) + W12); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0xD5A79147) + W13); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x06CA6351) + W14); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x14292967) + W15); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x27B70A85) + W00); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x2E1B2138) + W01); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x4D2C6DFC) + W02); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x53380D13) + W03); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x650A7354) + W04); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x766A0ABB) + W05); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x81C2C92E) + W06); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x92722C85) + W07); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0xA2BFE8A1) + W08); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0xA81A664B) + W09); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0xC24B8B70) + W10); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0xC76C51A3) + W11); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0xD192E819) + W12); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0xD6990624) + W13); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0xF40E3585) + W14); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x106AA070) + W15); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x19A4C116) + W00); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x1E376C08) + W01); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x2748774C) + W02); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x34B0BCB5) + W03); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x391C0CB3) + W04); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x4ED8AA4A) + W05); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x5B9CCA4F) + W06); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x682E6FF3) + W07); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x748F82EE) + W08); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x78A5636F) + W09); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x84C87814) + W10); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x8CC70208) + W11); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x90BEFFFA) + W12); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0xA4506CEB) + W13); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0xBEF9A3F7) + W14); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0xC67178F2) + W15); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + (r)[0] = SPH_T32((r)[0] + A); \ + (r)[1] = SPH_T32((r)[1] + B); \ + (r)[2] = SPH_T32((r)[2] + C); \ + (r)[3] = SPH_T32((r)[3] + D); \ + (r)[4] = SPH_T32((r)[4] + E); \ + (r)[5] = SPH_T32((r)[5] + F); \ + (r)[6] = SPH_T32((r)[6] + G); \ + (r)[7] = SPH_T32((r)[7] + H); \ + } while (0) + +#endif + +/* + * One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access. + */ +static void +sha2_round(const unsigned char *data, sph_u32 r[8]) +{ +#define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x))) + SHA2_ROUND_BODY(SHA2_IN, r); +#undef SHA2_IN +} + +/* see sph_sha2.h */ +void +sph_sha224_init(void *cc) +{ + sph_sha224_context *sc; + + sc = cc; + memcpy(sc->val, H224, sizeof H224); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +/* see sph_sha2.h */ +void +sph_sha256_init(void *cc) +{ + sph_sha256_context *sc; + + sc = cc; + memcpy(sc->val, H256, sizeof H256); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN sha2_round +#define HASH sha224 +#define BE32 1 +#include "md_helper.c" + +/* see sph_sha2.h */ +void +sph_sha224_close(void *cc, void *dst) +{ + sha224_close(cc, dst, 7); + sph_sha224_init(cc); +} + +/* see sph_sha2.h */ +void +sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha224_addbits_and_close(cc, ub, n, dst, 7); + sph_sha224_init(cc); +} + +/* see sph_sha2.h */ +void +sph_sha256_close(void *cc, void *dst) +{ + sha224_close(cc, dst, 8); + sph_sha256_init(cc); +} + +/* see sph_sha2.h */ +void +sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha224_addbits_and_close(cc, ub, n, dst, 8); + sph_sha256_init(cc); +} + +/* see sph_sha2.h */ +void +sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]) +{ +#define SHA2_IN(x) msg[x] + SHA2_ROUND_BODY(SHA2_IN, val); +#undef SHA2_IN +} diff --git a/sph/sph_sha2.h b/sph/sph_sha2.h new file mode 100644 index 0000000000..c47b0f3698 --- /dev/null +++ b/sph/sph_sha2.h @@ -0,0 +1,378 @@ +/* $Id: sph_sha2.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * SHA-224, SHA-256, SHA-384 and SHA-512 interface. + * + * SHA-256 has been published in FIPS 180-2, now amended with a change + * notice to include SHA-224 as well (which is a simple variation on + * SHA-256). SHA-384 and SHA-512 are also defined in FIPS 180-2. FIPS + * standards can be found at: + * http://csrc.nist.gov/publications/fips/ + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_sha2.h + * @author Thomas Pornin + */ + +#ifndef SPH_SHA2_H__ +#define SPH_SHA2_H__ + +#include +#include "sph_types.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +/** + * Output size (in bits) for SHA-224. + */ +#define SPH_SIZE_sha224 224 + +/** + * Output size (in bits) for SHA-256. + */ +#define SPH_SIZE_sha256 256 + +/** + * This structure is a context for SHA-224 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a SHA-224 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running SHA-224 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[8]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_sha224_context; + +/** + * This structure is a context for SHA-256 computations. It is identical + * to the SHA-224 context. However, a context is initialized for SHA-224 + * or SHA-256, but not both (the internal IV is not the + * same). + */ +typedef sph_sha224_context sph_sha256_context; + +/** + * Initialize a SHA-224 context. This process performs no memory allocation. + * + * @param cc the SHA-224 context (pointer to + * a sph_sha224_context) + */ +void sph_sha224_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the SHA-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha224(void *cc, const void *data, size_t len); + +/** + * Terminate the current SHA-224 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-224 context + * @param dst the destination buffer + */ +void sph_sha224_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (28 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-224 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Apply the SHA-224 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the big-endian decoding). The + * val parameter contains the 8 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 256-bit input and output + */ +void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]); + +/** + * Initialize a SHA-256 context. This process performs no memory allocation. + * + * @param cc the SHA-256 context (pointer to + * a sph_sha256_context) + */ +void sph_sha256_init(void *cc); + +#ifdef DOXYGEN_IGNORE +/** + * Process some data bytes, for SHA-256. This function is identical to + * sha_224() + * + * @param cc the SHA-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha256(void *cc, const void *data, size_t len); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_sha256 sph_sha224 +#endif + +/** + * Terminate the current SHA-256 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-256 context + * @param dst the destination buffer + */ +void sph_sha256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef DOXYGEN_IGNORE +/** + * Apply the SHA-256 compression function on the provided data. This + * function is identical to sha224_comp(). + * + * @param msg the message block (16 values) + * @param val the function 256-bit input and output + */ +void sph_sha256_comp(const sph_u32 msg[16], sph_u32 val[8]); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_sha256_comp sph_sha224_comp +#endif + +#if SPH_64 + +/** + * Output size (in bits) for SHA-384. + */ +#define SPH_SIZE_sha384 384 + +/** + * Output size (in bits) for SHA-512. + */ +#define SPH_SIZE_sha512 512 + +/** + * This structure is a context for SHA-384 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a SHA-384 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running SHA-384 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[128]; /* first field, for alignment */ + sph_u64 val[8]; + sph_u64 count; +#endif +} sph_sha384_context; + +/** + * Initialize a SHA-384 context. This process performs no memory allocation. + * + * @param cc the SHA-384 context (pointer to + * a sph_sha384_context) + */ +void sph_sha384_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the SHA-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha384(void *cc, const void *data, size_t len); + +/** + * Terminate the current SHA-384 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (48 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-384 context + * @param dst the destination buffer + */ +void sph_sha384_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (48 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-384 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Apply the SHA-384 compression function on the provided data. The + * msg parameter contains the 16 64-bit input blocks, + * as numerical values (hence after the big-endian decoding). The + * val parameter contains the 8 64-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 512-bit input and output + */ +void sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]); + +/** + * This structure is a context for SHA-512 computations. It is identical + * to the SHA-384 context. However, a context is initialized for SHA-384 + * or SHA-512, but not both (the internal IV is not the + * same). + */ +typedef sph_sha384_context sph_sha512_context; + +/** + * Initialize a SHA-512 context. This process performs no memory allocation. + * + * @param cc the SHA-512 context (pointer to + * a sph_sha512_context) + */ +void sph_sha512_init(void *cc); + +#ifdef DOXYGEN_IGNORE +/** + * Process some data bytes, for SHA-512. This function is identical to + * sph_sha384(). + * + * @param cc the SHA-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha512(void *cc, const void *data, size_t len); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_sha512 sph_sha384 +#endif + +/** + * Terminate the current SHA-512 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-512 context + * @param dst the destination buffer + */ +void sph_sha512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef DOXYGEN_IGNORE +/** + * Apply the SHA-512 compression function. This function is identical to + * sph_sha384_comp(). + * + * @param msg the message block (16 values) + * @param val the function 512-bit input and output + */ +void sph_sha512_comp(const sph_u64 msg[16], sph_u64 val[8]); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_sha512_comp sph_sha384_comp +#endif + +#endif + +#endif +#ifdef __cplusplus +} +#endif + diff --git a/sph/sph_shabal.h b/sph/sph_shabal.h new file mode 100644 index 0000000000..4c96047742 --- /dev/null +++ b/sph/sph_shabal.h @@ -0,0 +1,336 @@ +/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */ +/** + * Shabal interface. Shabal is a family of functions which differ by + * their output size; this implementation defines Shabal for output + * sizes 192, 224, 256, 384 and 512 bits. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_shabal.h + * @author Thomas Pornin + */ + +#ifndef SPH_SHABAL_H__ +#define SPH_SHABAL_H__ + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for Shabal-192. + */ +#define SPH_SIZE_shabal192 192 + +/** + * Output size (in bits) for Shabal-224. + */ +#define SPH_SIZE_shabal224 224 + +/** + * Output size (in bits) for Shabal-256. + */ +#define SPH_SIZE_shabal256 256 + +/** + * Output size (in bits) for Shabal-384. + */ +#define SPH_SIZE_shabal384 384 + +/** + * Output size (in bits) for Shabal-512. + */ +#define SPH_SIZE_shabal512 512 + +/** + * This structure is a context for Shabal computations: it contains the + * intermediate values and some data from the last entered block. Once + * a Shabal computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running Shabal computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + size_t ptr; + sph_u32 A[12], B[16], C[16]; + sph_u32 Whigh, Wlow; +#endif +} sph_shabal_context; + +/** + * Type for a Shabal-192 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal192_context; + +/** + * Type for a Shabal-224 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal224_context; + +/** + * Type for a Shabal-256 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal256_context; + +/** + * Type for a Shabal-384 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal384_context; + +/** + * Type for a Shabal-512 context (identical to the common context). + */ +typedef sph_shabal_context sph_shabal512_context; + +/** + * Initialize a Shabal-192 context. This process performs no memory allocation. + * + * @param cc the Shabal-192 context (pointer to a + * sph_shabal192_context) + */ +void sph_shabal192_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-192 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal192(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-192 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (24 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-192 context + * @param dst the destination buffer + */ +void sph_shabal192_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (24 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-192 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal192_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Shabal-224 context. This process performs no memory allocation. + * + * @param cc the Shabal-224 context (pointer to a + * sph_shabal224_context) + */ +void sph_shabal224_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal224(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-224 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-224 context + * @param dst the destination buffer + */ +void sph_shabal224_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (28 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-224 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal224_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Shabal-256 context. This process performs no memory allocation. + * + * @param cc the Shabal-256 context (pointer to a + * sph_shabal256_context) + */ +void sph_shabal256_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-256 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal256(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-256 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-256 context + * @param dst the destination buffer + */ +void sph_shabal256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal256_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Shabal-384 context. This process performs no memory allocation. + * + * @param cc the Shabal-384 context (pointer to a + * sph_shabal384_context) + */ +void sph_shabal384_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal384(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-384 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (48 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-384 context + * @param dst the destination buffer + */ +void sph_shabal384_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (48 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-384 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal384_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Initialize a Shabal-512 context. This process performs no memory allocation. + * + * @param cc the Shabal-512 context (pointer to a + * sph_shabal512_context) + */ +void sph_shabal512_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Shabal-512 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_shabal512(void *cc, const void *data, size_t len); + +/** + * Terminate the current Shabal-512 computation and output the result into + * the provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the Shabal-512 context + * @param dst the destination buffer + */ +void sph_shabal512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the Shabal-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_shabal512_addbits_and_close( + void *cc, unsigned ub, unsigned n, void *dst); + +#endif diff --git a/sph/sph_tiger.h b/sph/sph_tiger.h new file mode 100644 index 0000000000..9cf9fda077 --- /dev/null +++ b/sph/sph_tiger.h @@ -0,0 +1,191 @@ +/* $Id: sph_tiger.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * Tiger / Tiger-2 interface. + * + * Tiger has been published in: R. Anderson, E. Biham, "Tiger: A Fast + * New Hash Function", Fast Software Encryption - FSE'96, LNCS 1039, + * Springer (1996), pp. 89--97. + * + * Tiger2 has never been formally published, but it was described as + * identical to Tiger, except for the padding which is the same in + * Tiger2 as it is in MD4. Fortunately, an implementation of Tiger2 + * was submitted to NESSIE, which produced test vectors; the sphlib + * implementation of Tiger2 is compatible with the NESSIE test vectors. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_tiger.h + * @author Thomas Pornin + */ + +#ifndef SPH_TIGER_H__ +#define SPH_TIGER_H__ + +#include +#include "sph_types.h" + +#if SPH_64 + +/** + * Output size (in bits) for Tiger. + */ +#define SPH_SIZE_tiger 192 + +/** + * Output size (in bits) for Tiger2. + */ +#define SPH_SIZE_tiger2 192 + +/** + * This structure is a context for Tiger computations: it contains the + * intermediate values and some data from the last entered block. Once + * a Tiger computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running Tiger computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u64 val[3]; + sph_u64 count; +#endif +} sph_tiger_context; + +/** + * Initialize a Tiger context. This process performs no memory allocation. + * + * @param cc the Tiger context (pointer to + * a sph_tiger_context) + */ +void sph_tiger_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the Tiger context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_tiger(void *cc, const void *data, size_t len); + +/** + * Terminate the current Tiger computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (24 bytes). The context is automatically + * reinitialized. + * + * @param cc the Tiger context + * @param dst the destination buffer + */ +void sph_tiger_close(void *cc, void *dst); + +/** + * Apply the Tiger compression function on the provided data. The + * msg parameter contains the 8 64-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 3 64-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (8 values) + * @param val the function 192-bit input and output + */ +void sph_tiger_comp(const sph_u64 msg[8], sph_u64 val[3]); + +/** + * This structure is a context for Tiger2 computations. It is identical + * to the Tiger context, and they may be freely exchanged, since the + * difference between Tiger and Tiger2 resides solely in the padding, which + * is computed only in the last computation step. + */ +typedef sph_tiger_context sph_tiger2_context; + +#ifdef DOXYGEN_IGNORE +/** + * Initialize a Tiger2 context. This function is identical to + * sph_tiger_init(). + * + * @param cc the Tiger2 context (pointer to + * a sph_tiger2_context) + */ +void sph_tiger2_init(void *cc); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_tiger2_init sph_tiger_init +#endif + +#ifdef DOXYGEN_IGNORE +/** + * Process some data bytes. This function is identical to + * sph_tiger(). + * + * @param cc the Tiger2 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_tiger2(void *cc, const void *data, size_t len); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_tiger2 sph_tiger +#endif + +/** + * Terminate the current Tiger2 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (24 bytes). The context is automatically + * reinitialized. Note that this function is NOT identical to + * sph_tiger2_close(): this is the exact and unique point + * where Tiger and Tiger2 differ. + * + * @param cc the Tiger context + * @param dst the destination buffer + */ +void sph_tiger2_close(void *cc, void *dst); + +#ifdef DOXYGEN_IGNORE +/** + * Apply the Tiger2 compression function, which is identical to the Tiger + * compression function. + * + * @param msg the message block (8 values) + * @param val the function 192-bit input and output + */ +void sph_tiger2_comp(const sph_u64 msg[8], sph_u64 val[3]); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_tiger2_comp sph_tiger_comp +#endif + +#endif + +#endif diff --git a/sph/sph_whirlpool.h b/sph/sph_whirlpool.h new file mode 100644 index 0000000000..bc4c3d624b --- /dev/null +++ b/sph/sph_whirlpool.h @@ -0,0 +1,209 @@ +/* $Id: sph_whirlpool.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * WHIRLPOOL interface. + * + * WHIRLPOOL knows three variants, dubbed "WHIRLPOOL-0" (original + * version, published in 2000, studied by NESSIE), "WHIRLPOOL-1" + * (first revision, 2001, with a new S-box) and "WHIRLPOOL" (current + * version, 2003, with a new diffusion matrix, also described as "plain + * WHIRLPOOL"). All three variants are implemented here. + * + * The original WHIRLPOOL (i.e. WHIRLPOOL-0) was published in: P. S. L. + * M. Barreto, V. Rijmen, "The Whirlpool Hashing Function", First open + * NESSIE Workshop, Leuven, Belgium, November 13--14, 2000. + * + * The current WHIRLPOOL specification and a reference implementation + * can be found on the WHIRLPOOL web page: + * http://paginas.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_whirlpool.h + * @author Thomas Pornin + */ + +#ifndef SPH_WHIRLPOOL_H__ +#define SPH_WHIRLPOOL_H__ + +#include +#include "sph_types.h" + +#if SPH_64 + +/** + * Output size (in bits) for WHIRLPOOL. + */ +#define SPH_SIZE_whirlpool 512 + +/** + * Output size (in bits) for WHIRLPOOL-0. + */ +#define SPH_SIZE_whirlpool0 512 + +/** + * Output size (in bits) for WHIRLPOOL-1. + */ +#define SPH_SIZE_whirlpool1 512 + +/** + * This structure is a context for WHIRLPOOL computations: it contains the + * intermediate values and some data from the last entered block. Once + * a WHIRLPOOL computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running WHIRLPOOL computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u64 state[8]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_whirlpool_context; + +/** + * Initialize a WHIRLPOOL context. This process performs no memory allocation. + * + * @param cc the WHIRLPOOL context (pointer to a + * sph_whirlpool_context) + */ +void sph_whirlpool_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). This function applies the + * plain WHIRLPOOL algorithm. + * + * @param cc the WHIRLPOOL context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_whirlpool(void *cc, const void *data, size_t len); + +/** + * Terminate the current WHIRLPOOL computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the WHIRLPOOL context + * @param dst the destination buffer + */ +void sph_whirlpool_close(void *cc, void *dst); + +/** + * WHIRLPOOL-0 uses the same structure than plain WHIRLPOOL. + */ +typedef sph_whirlpool_context sph_whirlpool0_context; + +#ifdef DOXYGEN_IGNORE +/** + * Initialize a WHIRLPOOL-0 context. This function is identical to + * sph_whirlpool_init(). + * + * @param cc the WHIRLPOOL context (pointer to a + * sph_whirlpool0_context) + */ +void sph_whirlpool0_init(void *cc); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_whirlpool0_init sph_whirlpool_init +#endif + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). This function applies the + * WHIRLPOOL-0 algorithm. + * + * @param cc the WHIRLPOOL context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_whirlpool0(void *cc, const void *data, size_t len); + +/** + * Terminate the current WHIRLPOOL-0 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the WHIRLPOOL-0 context + * @param dst the destination buffer + */ +void sph_whirlpool0_close(void *cc, void *dst); + +/** + * WHIRLPOOL-1 uses the same structure than plain WHIRLPOOL. + */ +typedef sph_whirlpool_context sph_whirlpool1_context; + +#ifdef DOXYGEN_IGNORE +/** + * Initialize a WHIRLPOOL-1 context. This function is identical to + * sph_whirlpool_init(). + * + * @param cc the WHIRLPOOL context (pointer to a + * sph_whirlpool1_context) + */ +void sph_whirlpool1_init(void *cc); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_whirlpool1_init sph_whirlpool_init +#endif + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). This function applies the + * WHIRLPOOL-1 algorithm. + * + * @param cc the WHIRLPOOL context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_whirlpool1(void *cc, const void *data, size_t len); + +/** + * Terminate the current WHIRLPOOL-1 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the WHIRLPOOL-1 context + * @param dst the destination buffer + */ +void sph_whirlpool1_close(void *cc, void *dst); + +#endif + +#endif diff --git a/sph/tiger.c b/sph/tiger.c new file mode 100644 index 0000000000..7ab5d178ac --- /dev/null +++ b/sph/tiger.c @@ -0,0 +1,698 @@ +/* $Id: tiger.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * Tiger / Tiger2 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_tiger.h" + +#if SPH_64 + +static const sph_u64 T1[256] = { + SPH_C64(0x02AAB17CF7E90C5E), SPH_C64(0xAC424B03E243A8EC), + SPH_C64(0x72CD5BE30DD5FCD3), SPH_C64(0x6D019B93F6F97F3A), + SPH_C64(0xCD9978FFD21F9193), SPH_C64(0x7573A1C9708029E2), + SPH_C64(0xB164326B922A83C3), SPH_C64(0x46883EEE04915870), + SPH_C64(0xEAACE3057103ECE6), SPH_C64(0xC54169B808A3535C), + SPH_C64(0x4CE754918DDEC47C), SPH_C64(0x0AA2F4DFDC0DF40C), + SPH_C64(0x10B76F18A74DBEFA), SPH_C64(0xC6CCB6235AD1AB6A), + SPH_C64(0x13726121572FE2FF), SPH_C64(0x1A488C6F199D921E), + SPH_C64(0x4BC9F9F4DA0007CA), SPH_C64(0x26F5E6F6E85241C7), + SPH_C64(0x859079DBEA5947B6), SPH_C64(0x4F1885C5C99E8C92), + SPH_C64(0xD78E761EA96F864B), SPH_C64(0x8E36428C52B5C17D), + SPH_C64(0x69CF6827373063C1), SPH_C64(0xB607C93D9BB4C56E), + SPH_C64(0x7D820E760E76B5EA), SPH_C64(0x645C9CC6F07FDC42), + SPH_C64(0xBF38A078243342E0), SPH_C64(0x5F6B343C9D2E7D04), + SPH_C64(0xF2C28AEB600B0EC6), SPH_C64(0x6C0ED85F7254BCAC), + SPH_C64(0x71592281A4DB4FE5), SPH_C64(0x1967FA69CE0FED9F), + SPH_C64(0xFD5293F8B96545DB), SPH_C64(0xC879E9D7F2A7600B), + SPH_C64(0x860248920193194E), SPH_C64(0xA4F9533B2D9CC0B3), + SPH_C64(0x9053836C15957613), SPH_C64(0xDB6DCF8AFC357BF1), + SPH_C64(0x18BEEA7A7A370F57), SPH_C64(0x037117CA50B99066), + SPH_C64(0x6AB30A9774424A35), SPH_C64(0xF4E92F02E325249B), + SPH_C64(0x7739DB07061CCAE1), SPH_C64(0xD8F3B49CECA42A05), + SPH_C64(0xBD56BE3F51382F73), SPH_C64(0x45FAED5843B0BB28), + SPH_C64(0x1C813D5C11BF1F83), SPH_C64(0x8AF0E4B6D75FA169), + SPH_C64(0x33EE18A487AD9999), SPH_C64(0x3C26E8EAB1C94410), + SPH_C64(0xB510102BC0A822F9), SPH_C64(0x141EEF310CE6123B), + SPH_C64(0xFC65B90059DDB154), SPH_C64(0xE0158640C5E0E607), + SPH_C64(0x884E079826C3A3CF), SPH_C64(0x930D0D9523C535FD), + SPH_C64(0x35638D754E9A2B00), SPH_C64(0x4085FCCF40469DD5), + SPH_C64(0xC4B17AD28BE23A4C), SPH_C64(0xCAB2F0FC6A3E6A2E), + SPH_C64(0x2860971A6B943FCD), SPH_C64(0x3DDE6EE212E30446), + SPH_C64(0x6222F32AE01765AE), SPH_C64(0x5D550BB5478308FE), + SPH_C64(0xA9EFA98DA0EDA22A), SPH_C64(0xC351A71686C40DA7), + SPH_C64(0x1105586D9C867C84), SPH_C64(0xDCFFEE85FDA22853), + SPH_C64(0xCCFBD0262C5EEF76), SPH_C64(0xBAF294CB8990D201), + SPH_C64(0xE69464F52AFAD975), SPH_C64(0x94B013AFDF133E14), + SPH_C64(0x06A7D1A32823C958), SPH_C64(0x6F95FE5130F61119), + SPH_C64(0xD92AB34E462C06C0), SPH_C64(0xED7BDE33887C71D2), + SPH_C64(0x79746D6E6518393E), SPH_C64(0x5BA419385D713329), + SPH_C64(0x7C1BA6B948A97564), SPH_C64(0x31987C197BFDAC67), + SPH_C64(0xDE6C23C44B053D02), SPH_C64(0x581C49FED002D64D), + SPH_C64(0xDD474D6338261571), SPH_C64(0xAA4546C3E473D062), + SPH_C64(0x928FCE349455F860), SPH_C64(0x48161BBACAAB94D9), + SPH_C64(0x63912430770E6F68), SPH_C64(0x6EC8A5E602C6641C), + SPH_C64(0x87282515337DDD2B), SPH_C64(0x2CDA6B42034B701B), + SPH_C64(0xB03D37C181CB096D), SPH_C64(0xE108438266C71C6F), + SPH_C64(0x2B3180C7EB51B255), SPH_C64(0xDF92B82F96C08BBC), + SPH_C64(0x5C68C8C0A632F3BA), SPH_C64(0x5504CC861C3D0556), + SPH_C64(0xABBFA4E55FB26B8F), SPH_C64(0x41848B0AB3BACEB4), + SPH_C64(0xB334A273AA445D32), SPH_C64(0xBCA696F0A85AD881), + SPH_C64(0x24F6EC65B528D56C), SPH_C64(0x0CE1512E90F4524A), + SPH_C64(0x4E9DD79D5506D35A), SPH_C64(0x258905FAC6CE9779), + SPH_C64(0x2019295B3E109B33), SPH_C64(0xF8A9478B73A054CC), + SPH_C64(0x2924F2F934417EB0), SPH_C64(0x3993357D536D1BC4), + SPH_C64(0x38A81AC21DB6FF8B), SPH_C64(0x47C4FBF17D6016BF), + SPH_C64(0x1E0FAADD7667E3F5), SPH_C64(0x7ABCFF62938BEB96), + SPH_C64(0xA78DAD948FC179C9), SPH_C64(0x8F1F98B72911E50D), + SPH_C64(0x61E48EAE27121A91), SPH_C64(0x4D62F7AD31859808), + SPH_C64(0xECEBA345EF5CEAEB), SPH_C64(0xF5CEB25EBC9684CE), + SPH_C64(0xF633E20CB7F76221), SPH_C64(0xA32CDF06AB8293E4), + SPH_C64(0x985A202CA5EE2CA4), SPH_C64(0xCF0B8447CC8A8FB1), + SPH_C64(0x9F765244979859A3), SPH_C64(0xA8D516B1A1240017), + SPH_C64(0x0BD7BA3EBB5DC726), SPH_C64(0xE54BCA55B86ADB39), + SPH_C64(0x1D7A3AFD6C478063), SPH_C64(0x519EC608E7669EDD), + SPH_C64(0x0E5715A2D149AA23), SPH_C64(0x177D4571848FF194), + SPH_C64(0xEEB55F3241014C22), SPH_C64(0x0F5E5CA13A6E2EC2), + SPH_C64(0x8029927B75F5C361), SPH_C64(0xAD139FABC3D6E436), + SPH_C64(0x0D5DF1A94CCF402F), SPH_C64(0x3E8BD948BEA5DFC8), + SPH_C64(0xA5A0D357BD3FF77E), SPH_C64(0xA2D12E251F74F645), + SPH_C64(0x66FD9E525E81A082), SPH_C64(0x2E0C90CE7F687A49), + SPH_C64(0xC2E8BCBEBA973BC5), SPH_C64(0x000001BCE509745F), + SPH_C64(0x423777BBE6DAB3D6), SPH_C64(0xD1661C7EAEF06EB5), + SPH_C64(0xA1781F354DAACFD8), SPH_C64(0x2D11284A2B16AFFC), + SPH_C64(0xF1FC4F67FA891D1F), SPH_C64(0x73ECC25DCB920ADA), + SPH_C64(0xAE610C22C2A12651), SPH_C64(0x96E0A810D356B78A), + SPH_C64(0x5A9A381F2FE7870F), SPH_C64(0xD5AD62EDE94E5530), + SPH_C64(0xD225E5E8368D1427), SPH_C64(0x65977B70C7AF4631), + SPH_C64(0x99F889B2DE39D74F), SPH_C64(0x233F30BF54E1D143), + SPH_C64(0x9A9675D3D9A63C97), SPH_C64(0x5470554FF334F9A8), + SPH_C64(0x166ACB744A4F5688), SPH_C64(0x70C74CAAB2E4AEAD), + SPH_C64(0xF0D091646F294D12), SPH_C64(0x57B82A89684031D1), + SPH_C64(0xEFD95A5A61BE0B6B), SPH_C64(0x2FBD12E969F2F29A), + SPH_C64(0x9BD37013FEFF9FE8), SPH_C64(0x3F9B0404D6085A06), + SPH_C64(0x4940C1F3166CFE15), SPH_C64(0x09542C4DCDF3DEFB), + SPH_C64(0xB4C5218385CD5CE3), SPH_C64(0xC935B7DC4462A641), + SPH_C64(0x3417F8A68ED3B63F), SPH_C64(0xB80959295B215B40), + SPH_C64(0xF99CDAEF3B8C8572), SPH_C64(0x018C0614F8FCB95D), + SPH_C64(0x1B14ACCD1A3ACDF3), SPH_C64(0x84D471F200BB732D), + SPH_C64(0xC1A3110E95E8DA16), SPH_C64(0x430A7220BF1A82B8), + SPH_C64(0xB77E090D39DF210E), SPH_C64(0x5EF4BD9F3CD05E9D), + SPH_C64(0x9D4FF6DA7E57A444), SPH_C64(0xDA1D60E183D4A5F8), + SPH_C64(0xB287C38417998E47), SPH_C64(0xFE3EDC121BB31886), + SPH_C64(0xC7FE3CCC980CCBEF), SPH_C64(0xE46FB590189BFD03), + SPH_C64(0x3732FD469A4C57DC), SPH_C64(0x7EF700A07CF1AD65), + SPH_C64(0x59C64468A31D8859), SPH_C64(0x762FB0B4D45B61F6), + SPH_C64(0x155BAED099047718), SPH_C64(0x68755E4C3D50BAA6), + SPH_C64(0xE9214E7F22D8B4DF), SPH_C64(0x2ADDBF532EAC95F4), + SPH_C64(0x32AE3909B4BD0109), SPH_C64(0x834DF537B08E3450), + SPH_C64(0xFA209DA84220728D), SPH_C64(0x9E691D9B9EFE23F7), + SPH_C64(0x0446D288C4AE8D7F), SPH_C64(0x7B4CC524E169785B), + SPH_C64(0x21D87F0135CA1385), SPH_C64(0xCEBB400F137B8AA5), + SPH_C64(0x272E2B66580796BE), SPH_C64(0x3612264125C2B0DE), + SPH_C64(0x057702BDAD1EFBB2), SPH_C64(0xD4BABB8EACF84BE9), + SPH_C64(0x91583139641BC67B), SPH_C64(0x8BDC2DE08036E024), + SPH_C64(0x603C8156F49F68ED), SPH_C64(0xF7D236F7DBEF5111), + SPH_C64(0x9727C4598AD21E80), SPH_C64(0xA08A0896670A5FD7), + SPH_C64(0xCB4A8F4309EBA9CB), SPH_C64(0x81AF564B0F7036A1), + SPH_C64(0xC0B99AA778199ABD), SPH_C64(0x959F1EC83FC8E952), + SPH_C64(0x8C505077794A81B9), SPH_C64(0x3ACAAF8F056338F0), + SPH_C64(0x07B43F50627A6778), SPH_C64(0x4A44AB49F5ECCC77), + SPH_C64(0x3BC3D6E4B679EE98), SPH_C64(0x9CC0D4D1CF14108C), + SPH_C64(0x4406C00B206BC8A0), SPH_C64(0x82A18854C8D72D89), + SPH_C64(0x67E366B35C3C432C), SPH_C64(0xB923DD61102B37F2), + SPH_C64(0x56AB2779D884271D), SPH_C64(0xBE83E1B0FF1525AF), + SPH_C64(0xFB7C65D4217E49A9), SPH_C64(0x6BDBE0E76D48E7D4), + SPH_C64(0x08DF828745D9179E), SPH_C64(0x22EA6A9ADD53BD34), + SPH_C64(0xE36E141C5622200A), SPH_C64(0x7F805D1B8CB750EE), + SPH_C64(0xAFE5C7A59F58E837), SPH_C64(0xE27F996A4FB1C23C), + SPH_C64(0xD3867DFB0775F0D0), SPH_C64(0xD0E673DE6E88891A), + SPH_C64(0x123AEB9EAFB86C25), SPH_C64(0x30F1D5D5C145B895), + SPH_C64(0xBB434A2DEE7269E7), SPH_C64(0x78CB67ECF931FA38), + SPH_C64(0xF33B0372323BBF9C), SPH_C64(0x52D66336FB279C74), + SPH_C64(0x505F33AC0AFB4EAA), SPH_C64(0xE8A5CD99A2CCE187), + SPH_C64(0x534974801E2D30BB), SPH_C64(0x8D2D5711D5876D90), + SPH_C64(0x1F1A412891BC038E), SPH_C64(0xD6E2E71D82E56648), + SPH_C64(0x74036C3A497732B7), SPH_C64(0x89B67ED96361F5AB), + SPH_C64(0xFFED95D8F1EA02A2), SPH_C64(0xE72B3BD61464D43D), + SPH_C64(0xA6300F170BDC4820), SPH_C64(0xEBC18760ED78A77A), +}; + +static const sph_u64 T2[256] = { + SPH_C64(0xE6A6BE5A05A12138), SPH_C64(0xB5A122A5B4F87C98), + SPH_C64(0x563C6089140B6990), SPH_C64(0x4C46CB2E391F5DD5), + SPH_C64(0xD932ADDBC9B79434), SPH_C64(0x08EA70E42015AFF5), + SPH_C64(0xD765A6673E478CF1), SPH_C64(0xC4FB757EAB278D99), + SPH_C64(0xDF11C6862D6E0692), SPH_C64(0xDDEB84F10D7F3B16), + SPH_C64(0x6F2EF604A665EA04), SPH_C64(0x4A8E0F0FF0E0DFB3), + SPH_C64(0xA5EDEEF83DBCBA51), SPH_C64(0xFC4F0A2A0EA4371E), + SPH_C64(0xE83E1DA85CB38429), SPH_C64(0xDC8FF882BA1B1CE2), + SPH_C64(0xCD45505E8353E80D), SPH_C64(0x18D19A00D4DB0717), + SPH_C64(0x34A0CFEDA5F38101), SPH_C64(0x0BE77E518887CAF2), + SPH_C64(0x1E341438B3C45136), SPH_C64(0xE05797F49089CCF9), + SPH_C64(0xFFD23F9DF2591D14), SPH_C64(0x543DDA228595C5CD), + SPH_C64(0x661F81FD99052A33), SPH_C64(0x8736E641DB0F7B76), + SPH_C64(0x15227725418E5307), SPH_C64(0xE25F7F46162EB2FA), + SPH_C64(0x48A8B2126C13D9FE), SPH_C64(0xAFDC541792E76EEA), + SPH_C64(0x03D912BFC6D1898F), SPH_C64(0x31B1AAFA1B83F51B), + SPH_C64(0xF1AC2796E42AB7D9), SPH_C64(0x40A3A7D7FCD2EBAC), + SPH_C64(0x1056136D0AFBBCC5), SPH_C64(0x7889E1DD9A6D0C85), + SPH_C64(0xD33525782A7974AA), SPH_C64(0xA7E25D09078AC09B), + SPH_C64(0xBD4138B3EAC6EDD0), SPH_C64(0x920ABFBE71EB9E70), + SPH_C64(0xA2A5D0F54FC2625C), SPH_C64(0xC054E36B0B1290A3), + SPH_C64(0xF6DD59FF62FE932B), SPH_C64(0x3537354511A8AC7D), + SPH_C64(0xCA845E9172FADCD4), SPH_C64(0x84F82B60329D20DC), + SPH_C64(0x79C62CE1CD672F18), SPH_C64(0x8B09A2ADD124642C), + SPH_C64(0xD0C1E96A19D9E726), SPH_C64(0x5A786A9B4BA9500C), + SPH_C64(0x0E020336634C43F3), SPH_C64(0xC17B474AEB66D822), + SPH_C64(0x6A731AE3EC9BAAC2), SPH_C64(0x8226667AE0840258), + SPH_C64(0x67D4567691CAECA5), SPH_C64(0x1D94155C4875ADB5), + SPH_C64(0x6D00FD985B813FDF), SPH_C64(0x51286EFCB774CD06), + SPH_C64(0x5E8834471FA744AF), SPH_C64(0xF72CA0AEE761AE2E), + SPH_C64(0xBE40E4CDAEE8E09A), SPH_C64(0xE9970BBB5118F665), + SPH_C64(0x726E4BEB33DF1964), SPH_C64(0x703B000729199762), + SPH_C64(0x4631D816F5EF30A7), SPH_C64(0xB880B5B51504A6BE), + SPH_C64(0x641793C37ED84B6C), SPH_C64(0x7B21ED77F6E97D96), + SPH_C64(0x776306312EF96B73), SPH_C64(0xAE528948E86FF3F4), + SPH_C64(0x53DBD7F286A3F8F8), SPH_C64(0x16CADCE74CFC1063), + SPH_C64(0x005C19BDFA52C6DD), SPH_C64(0x68868F5D64D46AD3), + SPH_C64(0x3A9D512CCF1E186A), SPH_C64(0x367E62C2385660AE), + SPH_C64(0xE359E7EA77DCB1D7), SPH_C64(0x526C0773749ABE6E), + SPH_C64(0x735AE5F9D09F734B), SPH_C64(0x493FC7CC8A558BA8), + SPH_C64(0xB0B9C1533041AB45), SPH_C64(0x321958BA470A59BD), + SPH_C64(0x852DB00B5F46C393), SPH_C64(0x91209B2BD336B0E5), + SPH_C64(0x6E604F7D659EF19F), SPH_C64(0xB99A8AE2782CCB24), + SPH_C64(0xCCF52AB6C814C4C7), SPH_C64(0x4727D9AFBE11727B), + SPH_C64(0x7E950D0C0121B34D), SPH_C64(0x756F435670AD471F), + SPH_C64(0xF5ADD442615A6849), SPH_C64(0x4E87E09980B9957A), + SPH_C64(0x2ACFA1DF50AEE355), SPH_C64(0xD898263AFD2FD556), + SPH_C64(0xC8F4924DD80C8FD6), SPH_C64(0xCF99CA3D754A173A), + SPH_C64(0xFE477BACAF91BF3C), SPH_C64(0xED5371F6D690C12D), + SPH_C64(0x831A5C285E687094), SPH_C64(0xC5D3C90A3708A0A4), + SPH_C64(0x0F7F903717D06580), SPH_C64(0x19F9BB13B8FDF27F), + SPH_C64(0xB1BD6F1B4D502843), SPH_C64(0x1C761BA38FFF4012), + SPH_C64(0x0D1530C4E2E21F3B), SPH_C64(0x8943CE69A7372C8A), + SPH_C64(0xE5184E11FEB5CE66), SPH_C64(0x618BDB80BD736621), + SPH_C64(0x7D29BAD68B574D0B), SPH_C64(0x81BB613E25E6FE5B), + SPH_C64(0x071C9C10BC07913F), SPH_C64(0xC7BEEB7909AC2D97), + SPH_C64(0xC3E58D353BC5D757), SPH_C64(0xEB017892F38F61E8), + SPH_C64(0xD4EFFB9C9B1CC21A), SPH_C64(0x99727D26F494F7AB), + SPH_C64(0xA3E063A2956B3E03), SPH_C64(0x9D4A8B9A4AA09C30), + SPH_C64(0x3F6AB7D500090FB4), SPH_C64(0x9CC0F2A057268AC0), + SPH_C64(0x3DEE9D2DEDBF42D1), SPH_C64(0x330F49C87960A972), + SPH_C64(0xC6B2720287421B41), SPH_C64(0x0AC59EC07C00369C), + SPH_C64(0xEF4EAC49CB353425), SPH_C64(0xF450244EEF0129D8), + SPH_C64(0x8ACC46E5CAF4DEB6), SPH_C64(0x2FFEAB63989263F7), + SPH_C64(0x8F7CB9FE5D7A4578), SPH_C64(0x5BD8F7644E634635), + SPH_C64(0x427A7315BF2DC900), SPH_C64(0x17D0C4AA2125261C), + SPH_C64(0x3992486C93518E50), SPH_C64(0xB4CBFEE0A2D7D4C3), + SPH_C64(0x7C75D6202C5DDD8D), SPH_C64(0xDBC295D8E35B6C61), + SPH_C64(0x60B369D302032B19), SPH_C64(0xCE42685FDCE44132), + SPH_C64(0x06F3DDB9DDF65610), SPH_C64(0x8EA4D21DB5E148F0), + SPH_C64(0x20B0FCE62FCD496F), SPH_C64(0x2C1B912358B0EE31), + SPH_C64(0xB28317B818F5A308), SPH_C64(0xA89C1E189CA6D2CF), + SPH_C64(0x0C6B18576AAADBC8), SPH_C64(0xB65DEAA91299FAE3), + SPH_C64(0xFB2B794B7F1027E7), SPH_C64(0x04E4317F443B5BEB), + SPH_C64(0x4B852D325939D0A6), SPH_C64(0xD5AE6BEEFB207FFC), + SPH_C64(0x309682B281C7D374), SPH_C64(0xBAE309A194C3B475), + SPH_C64(0x8CC3F97B13B49F05), SPH_C64(0x98A9422FF8293967), + SPH_C64(0x244B16B01076FF7C), SPH_C64(0xF8BF571C663D67EE), + SPH_C64(0x1F0D6758EEE30DA1), SPH_C64(0xC9B611D97ADEB9B7), + SPH_C64(0xB7AFD5887B6C57A2), SPH_C64(0x6290AE846B984FE1), + SPH_C64(0x94DF4CDEACC1A5FD), SPH_C64(0x058A5BD1C5483AFF), + SPH_C64(0x63166CC142BA3C37), SPH_C64(0x8DB8526EB2F76F40), + SPH_C64(0xE10880036F0D6D4E), SPH_C64(0x9E0523C9971D311D), + SPH_C64(0x45EC2824CC7CD691), SPH_C64(0x575B8359E62382C9), + SPH_C64(0xFA9E400DC4889995), SPH_C64(0xD1823ECB45721568), + SPH_C64(0xDAFD983B8206082F), SPH_C64(0xAA7D29082386A8CB), + SPH_C64(0x269FCD4403B87588), SPH_C64(0x1B91F5F728BDD1E0), + SPH_C64(0xE4669F39040201F6), SPH_C64(0x7A1D7C218CF04ADE), + SPH_C64(0x65623C29D79CE5CE), SPH_C64(0x2368449096C00BB1), + SPH_C64(0xAB9BF1879DA503BA), SPH_C64(0xBC23ECB1A458058E), + SPH_C64(0x9A58DF01BB401ECC), SPH_C64(0xA070E868A85F143D), + SPH_C64(0x4FF188307DF2239E), SPH_C64(0x14D565B41A641183), + SPH_C64(0xEE13337452701602), SPH_C64(0x950E3DCF3F285E09), + SPH_C64(0x59930254B9C80953), SPH_C64(0x3BF299408930DA6D), + SPH_C64(0xA955943F53691387), SPH_C64(0xA15EDECAA9CB8784), + SPH_C64(0x29142127352BE9A0), SPH_C64(0x76F0371FFF4E7AFB), + SPH_C64(0x0239F450274F2228), SPH_C64(0xBB073AF01D5E868B), + SPH_C64(0xBFC80571C10E96C1), SPH_C64(0xD267088568222E23), + SPH_C64(0x9671A3D48E80B5B0), SPH_C64(0x55B5D38AE193BB81), + SPH_C64(0x693AE2D0A18B04B8), SPH_C64(0x5C48B4ECADD5335F), + SPH_C64(0xFD743B194916A1CA), SPH_C64(0x2577018134BE98C4), + SPH_C64(0xE77987E83C54A4AD), SPH_C64(0x28E11014DA33E1B9), + SPH_C64(0x270CC59E226AA213), SPH_C64(0x71495F756D1A5F60), + SPH_C64(0x9BE853FB60AFEF77), SPH_C64(0xADC786A7F7443DBF), + SPH_C64(0x0904456173B29A82), SPH_C64(0x58BC7A66C232BD5E), + SPH_C64(0xF306558C673AC8B2), SPH_C64(0x41F639C6B6C9772A), + SPH_C64(0x216DEFE99FDA35DA), SPH_C64(0x11640CC71C7BE615), + SPH_C64(0x93C43694565C5527), SPH_C64(0xEA038E6246777839), + SPH_C64(0xF9ABF3CE5A3E2469), SPH_C64(0x741E768D0FD312D2), + SPH_C64(0x0144B883CED652C6), SPH_C64(0xC20B5A5BA33F8552), + SPH_C64(0x1AE69633C3435A9D), SPH_C64(0x97A28CA4088CFDEC), + SPH_C64(0x8824A43C1E96F420), SPH_C64(0x37612FA66EEEA746), + SPH_C64(0x6B4CB165F9CF0E5A), SPH_C64(0x43AA1C06A0ABFB4A), + SPH_C64(0x7F4DC26FF162796B), SPH_C64(0x6CBACC8E54ED9B0F), + SPH_C64(0xA6B7FFEFD2BB253E), SPH_C64(0x2E25BC95B0A29D4F), + SPH_C64(0x86D6A58BDEF1388C), SPH_C64(0xDED74AC576B6F054), + SPH_C64(0x8030BDBC2B45805D), SPH_C64(0x3C81AF70E94D9289), + SPH_C64(0x3EFF6DDA9E3100DB), SPH_C64(0xB38DC39FDFCC8847), + SPH_C64(0x123885528D17B87E), SPH_C64(0xF2DA0ED240B1B642), + SPH_C64(0x44CEFADCD54BF9A9), SPH_C64(0x1312200E433C7EE6), + SPH_C64(0x9FFCC84F3A78C748), SPH_C64(0xF0CD1F72248576BB), + SPH_C64(0xEC6974053638CFE4), SPH_C64(0x2BA7B67C0CEC4E4C), + SPH_C64(0xAC2F4DF3E5CE32ED), SPH_C64(0xCB33D14326EA4C11), + SPH_C64(0xA4E9044CC77E58BC), SPH_C64(0x5F513293D934FCEF), + SPH_C64(0x5DC9645506E55444), SPH_C64(0x50DE418F317DE40A), + SPH_C64(0x388CB31A69DDE259), SPH_C64(0x2DB4A83455820A86), + SPH_C64(0x9010A91E84711AE9), SPH_C64(0x4DF7F0B7B1498371), + SPH_C64(0xD62A2EABC0977179), SPH_C64(0x22FAC097AA8D5C0E), +}; + +static const sph_u64 T3[256] = { + SPH_C64(0xF49FCC2FF1DAF39B), SPH_C64(0x487FD5C66FF29281), + SPH_C64(0xE8A30667FCDCA83F), SPH_C64(0x2C9B4BE3D2FCCE63), + SPH_C64(0xDA3FF74B93FBBBC2), SPH_C64(0x2FA165D2FE70BA66), + SPH_C64(0xA103E279970E93D4), SPH_C64(0xBECDEC77B0E45E71), + SPH_C64(0xCFB41E723985E497), SPH_C64(0xB70AAA025EF75017), + SPH_C64(0xD42309F03840B8E0), SPH_C64(0x8EFC1AD035898579), + SPH_C64(0x96C6920BE2B2ABC5), SPH_C64(0x66AF4163375A9172), + SPH_C64(0x2174ABDCCA7127FB), SPH_C64(0xB33CCEA64A72FF41), + SPH_C64(0xF04A4933083066A5), SPH_C64(0x8D970ACDD7289AF5), + SPH_C64(0x8F96E8E031C8C25E), SPH_C64(0xF3FEC02276875D47), + SPH_C64(0xEC7BF310056190DD), SPH_C64(0xF5ADB0AEBB0F1491), + SPH_C64(0x9B50F8850FD58892), SPH_C64(0x4975488358B74DE8), + SPH_C64(0xA3354FF691531C61), SPH_C64(0x0702BBE481D2C6EE), + SPH_C64(0x89FB24057DEDED98), SPH_C64(0xAC3075138596E902), + SPH_C64(0x1D2D3580172772ED), SPH_C64(0xEB738FC28E6BC30D), + SPH_C64(0x5854EF8F63044326), SPH_C64(0x9E5C52325ADD3BBE), + SPH_C64(0x90AA53CF325C4623), SPH_C64(0xC1D24D51349DD067), + SPH_C64(0x2051CFEEA69EA624), SPH_C64(0x13220F0A862E7E4F), + SPH_C64(0xCE39399404E04864), SPH_C64(0xD9C42CA47086FCB7), + SPH_C64(0x685AD2238A03E7CC), SPH_C64(0x066484B2AB2FF1DB), + SPH_C64(0xFE9D5D70EFBF79EC), SPH_C64(0x5B13B9DD9C481854), + SPH_C64(0x15F0D475ED1509AD), SPH_C64(0x0BEBCD060EC79851), + SPH_C64(0xD58C6791183AB7F8), SPH_C64(0xD1187C5052F3EEE4), + SPH_C64(0xC95D1192E54E82FF), SPH_C64(0x86EEA14CB9AC6CA2), + SPH_C64(0x3485BEB153677D5D), SPH_C64(0xDD191D781F8C492A), + SPH_C64(0xF60866BAA784EBF9), SPH_C64(0x518F643BA2D08C74), + SPH_C64(0x8852E956E1087C22), SPH_C64(0xA768CB8DC410AE8D), + SPH_C64(0x38047726BFEC8E1A), SPH_C64(0xA67738B4CD3B45AA), + SPH_C64(0xAD16691CEC0DDE19), SPH_C64(0xC6D4319380462E07), + SPH_C64(0xC5A5876D0BA61938), SPH_C64(0x16B9FA1FA58FD840), + SPH_C64(0x188AB1173CA74F18), SPH_C64(0xABDA2F98C99C021F), + SPH_C64(0x3E0580AB134AE816), SPH_C64(0x5F3B05B773645ABB), + SPH_C64(0x2501A2BE5575F2F6), SPH_C64(0x1B2F74004E7E8BA9), + SPH_C64(0x1CD7580371E8D953), SPH_C64(0x7F6ED89562764E30), + SPH_C64(0xB15926FF596F003D), SPH_C64(0x9F65293DA8C5D6B9), + SPH_C64(0x6ECEF04DD690F84C), SPH_C64(0x4782275FFF33AF88), + SPH_C64(0xE41433083F820801), SPH_C64(0xFD0DFE409A1AF9B5), + SPH_C64(0x4325A3342CDB396B), SPH_C64(0x8AE77E62B301B252), + SPH_C64(0xC36F9E9F6655615A), SPH_C64(0x85455A2D92D32C09), + SPH_C64(0xF2C7DEA949477485), SPH_C64(0x63CFB4C133A39EBA), + SPH_C64(0x83B040CC6EBC5462), SPH_C64(0x3B9454C8FDB326B0), + SPH_C64(0x56F56A9E87FFD78C), SPH_C64(0x2DC2940D99F42BC6), + SPH_C64(0x98F7DF096B096E2D), SPH_C64(0x19A6E01E3AD852BF), + SPH_C64(0x42A99CCBDBD4B40B), SPH_C64(0xA59998AF45E9C559), + SPH_C64(0x366295E807D93186), SPH_C64(0x6B48181BFAA1F773), + SPH_C64(0x1FEC57E2157A0A1D), SPH_C64(0x4667446AF6201AD5), + SPH_C64(0xE615EBCACFB0F075), SPH_C64(0xB8F31F4F68290778), + SPH_C64(0x22713ED6CE22D11E), SPH_C64(0x3057C1A72EC3C93B), + SPH_C64(0xCB46ACC37C3F1F2F), SPH_C64(0xDBB893FD02AAF50E), + SPH_C64(0x331FD92E600B9FCF), SPH_C64(0xA498F96148EA3AD6), + SPH_C64(0xA8D8426E8B6A83EA), SPH_C64(0xA089B274B7735CDC), + SPH_C64(0x87F6B3731E524A11), SPH_C64(0x118808E5CBC96749), + SPH_C64(0x9906E4C7B19BD394), SPH_C64(0xAFED7F7E9B24A20C), + SPH_C64(0x6509EADEEB3644A7), SPH_C64(0x6C1EF1D3E8EF0EDE), + SPH_C64(0xB9C97D43E9798FB4), SPH_C64(0xA2F2D784740C28A3), + SPH_C64(0x7B8496476197566F), SPH_C64(0x7A5BE3E6B65F069D), + SPH_C64(0xF96330ED78BE6F10), SPH_C64(0xEEE60DE77A076A15), + SPH_C64(0x2B4BEE4AA08B9BD0), SPH_C64(0x6A56A63EC7B8894E), + SPH_C64(0x02121359BA34FEF4), SPH_C64(0x4CBF99F8283703FC), + SPH_C64(0x398071350CAF30C8), SPH_C64(0xD0A77A89F017687A), + SPH_C64(0xF1C1A9EB9E423569), SPH_C64(0x8C7976282DEE8199), + SPH_C64(0x5D1737A5DD1F7ABD), SPH_C64(0x4F53433C09A9FA80), + SPH_C64(0xFA8B0C53DF7CA1D9), SPH_C64(0x3FD9DCBC886CCB77), + SPH_C64(0xC040917CA91B4720), SPH_C64(0x7DD00142F9D1DCDF), + SPH_C64(0x8476FC1D4F387B58), SPH_C64(0x23F8E7C5F3316503), + SPH_C64(0x032A2244E7E37339), SPH_C64(0x5C87A5D750F5A74B), + SPH_C64(0x082B4CC43698992E), SPH_C64(0xDF917BECB858F63C), + SPH_C64(0x3270B8FC5BF86DDA), SPH_C64(0x10AE72BB29B5DD76), + SPH_C64(0x576AC94E7700362B), SPH_C64(0x1AD112DAC61EFB8F), + SPH_C64(0x691BC30EC5FAA427), SPH_C64(0xFF246311CC327143), + SPH_C64(0x3142368E30E53206), SPH_C64(0x71380E31E02CA396), + SPH_C64(0x958D5C960AAD76F1), SPH_C64(0xF8D6F430C16DA536), + SPH_C64(0xC8FFD13F1BE7E1D2), SPH_C64(0x7578AE66004DDBE1), + SPH_C64(0x05833F01067BE646), SPH_C64(0xBB34B5AD3BFE586D), + SPH_C64(0x095F34C9A12B97F0), SPH_C64(0x247AB64525D60CA8), + SPH_C64(0xDCDBC6F3017477D1), SPH_C64(0x4A2E14D4DECAD24D), + SPH_C64(0xBDB5E6D9BE0A1EEB), SPH_C64(0x2A7E70F7794301AB), + SPH_C64(0xDEF42D8A270540FD), SPH_C64(0x01078EC0A34C22C1), + SPH_C64(0xE5DE511AF4C16387), SPH_C64(0x7EBB3A52BD9A330A), + SPH_C64(0x77697857AA7D6435), SPH_C64(0x004E831603AE4C32), + SPH_C64(0xE7A21020AD78E312), SPH_C64(0x9D41A70C6AB420F2), + SPH_C64(0x28E06C18EA1141E6), SPH_C64(0xD2B28CBD984F6B28), + SPH_C64(0x26B75F6C446E9D83), SPH_C64(0xBA47568C4D418D7F), + SPH_C64(0xD80BADBFE6183D8E), SPH_C64(0x0E206D7F5F166044), + SPH_C64(0xE258A43911CBCA3E), SPH_C64(0x723A1746B21DC0BC), + SPH_C64(0xC7CAA854F5D7CDD3), SPH_C64(0x7CAC32883D261D9C), + SPH_C64(0x7690C26423BA942C), SPH_C64(0x17E55524478042B8), + SPH_C64(0xE0BE477656A2389F), SPH_C64(0x4D289B5E67AB2DA0), + SPH_C64(0x44862B9C8FBBFD31), SPH_C64(0xB47CC8049D141365), + SPH_C64(0x822C1B362B91C793), SPH_C64(0x4EB14655FB13DFD8), + SPH_C64(0x1ECBBA0714E2A97B), SPH_C64(0x6143459D5CDE5F14), + SPH_C64(0x53A8FBF1D5F0AC89), SPH_C64(0x97EA04D81C5E5B00), + SPH_C64(0x622181A8D4FDB3F3), SPH_C64(0xE9BCD341572A1208), + SPH_C64(0x1411258643CCE58A), SPH_C64(0x9144C5FEA4C6E0A4), + SPH_C64(0x0D33D06565CF620F), SPH_C64(0x54A48D489F219CA1), + SPH_C64(0xC43E5EAC6D63C821), SPH_C64(0xA9728B3A72770DAF), + SPH_C64(0xD7934E7B20DF87EF), SPH_C64(0xE35503B61A3E86E5), + SPH_C64(0xCAE321FBC819D504), SPH_C64(0x129A50B3AC60BFA6), + SPH_C64(0xCD5E68EA7E9FB6C3), SPH_C64(0xB01C90199483B1C7), + SPH_C64(0x3DE93CD5C295376C), SPH_C64(0xAED52EDF2AB9AD13), + SPH_C64(0x2E60F512C0A07884), SPH_C64(0xBC3D86A3E36210C9), + SPH_C64(0x35269D9B163951CE), SPH_C64(0x0C7D6E2AD0CDB5FA), + SPH_C64(0x59E86297D87F5733), SPH_C64(0x298EF221898DB0E7), + SPH_C64(0x55000029D1A5AA7E), SPH_C64(0x8BC08AE1B5061B45), + SPH_C64(0xC2C31C2B6C92703A), SPH_C64(0x94CC596BAF25EF42), + SPH_C64(0x0A1D73DB22540456), SPH_C64(0x04B6A0F9D9C4179A), + SPH_C64(0xEFFDAFA2AE3D3C60), SPH_C64(0xF7C8075BB49496C4), + SPH_C64(0x9CC5C7141D1CD4E3), SPH_C64(0x78BD1638218E5534), + SPH_C64(0xB2F11568F850246A), SPH_C64(0xEDFABCFA9502BC29), + SPH_C64(0x796CE5F2DA23051B), SPH_C64(0xAAE128B0DC93537C), + SPH_C64(0x3A493DA0EE4B29AE), SPH_C64(0xB5DF6B2C416895D7), + SPH_C64(0xFCABBD25122D7F37), SPH_C64(0x70810B58105DC4B1), + SPH_C64(0xE10FDD37F7882A90), SPH_C64(0x524DCAB5518A3F5C), + SPH_C64(0x3C9E85878451255B), SPH_C64(0x4029828119BD34E2), + SPH_C64(0x74A05B6F5D3CECCB), SPH_C64(0xB610021542E13ECA), + SPH_C64(0x0FF979D12F59E2AC), SPH_C64(0x6037DA27E4F9CC50), + SPH_C64(0x5E92975A0DF1847D), SPH_C64(0xD66DE190D3E623FE), + SPH_C64(0x5032D6B87B568048), SPH_C64(0x9A36B7CE8235216E), + SPH_C64(0x80272A7A24F64B4A), SPH_C64(0x93EFED8B8C6916F7), + SPH_C64(0x37DDBFF44CCE1555), SPH_C64(0x4B95DB5D4B99BD25), + SPH_C64(0x92D3FDA169812FC0), SPH_C64(0xFB1A4A9A90660BB6), + SPH_C64(0x730C196946A4B9B2), SPH_C64(0x81E289AA7F49DA68), + SPH_C64(0x64669A0F83B1A05F), SPH_C64(0x27B3FF7D9644F48B), + SPH_C64(0xCC6B615C8DB675B3), SPH_C64(0x674F20B9BCEBBE95), + SPH_C64(0x6F31238275655982), SPH_C64(0x5AE488713E45CF05), + SPH_C64(0xBF619F9954C21157), SPH_C64(0xEABAC46040A8EAE9), + SPH_C64(0x454C6FE9F2C0C1CD), SPH_C64(0x419CF6496412691C), + SPH_C64(0xD3DC3BEF265B0F70), SPH_C64(0x6D0E60F5C3578A9E), +}; + +static const sph_u64 T4[256] = { + SPH_C64(0x5B0E608526323C55), SPH_C64(0x1A46C1A9FA1B59F5), + SPH_C64(0xA9E245A17C4C8FFA), SPH_C64(0x65CA5159DB2955D7), + SPH_C64(0x05DB0A76CE35AFC2), SPH_C64(0x81EAC77EA9113D45), + SPH_C64(0x528EF88AB6AC0A0D), SPH_C64(0xA09EA253597BE3FF), + SPH_C64(0x430DDFB3AC48CD56), SPH_C64(0xC4B3A67AF45CE46F), + SPH_C64(0x4ECECFD8FBE2D05E), SPH_C64(0x3EF56F10B39935F0), + SPH_C64(0x0B22D6829CD619C6), SPH_C64(0x17FD460A74DF2069), + SPH_C64(0x6CF8CC8E8510ED40), SPH_C64(0xD6C824BF3A6ECAA7), + SPH_C64(0x61243D581A817049), SPH_C64(0x048BACB6BBC163A2), + SPH_C64(0xD9A38AC27D44CC32), SPH_C64(0x7FDDFF5BAAF410AB), + SPH_C64(0xAD6D495AA804824B), SPH_C64(0xE1A6A74F2D8C9F94), + SPH_C64(0xD4F7851235DEE8E3), SPH_C64(0xFD4B7F886540D893), + SPH_C64(0x247C20042AA4BFDA), SPH_C64(0x096EA1C517D1327C), + SPH_C64(0xD56966B4361A6685), SPH_C64(0x277DA5C31221057D), + SPH_C64(0x94D59893A43ACFF7), SPH_C64(0x64F0C51CCDC02281), + SPH_C64(0x3D33BCC4FF6189DB), SPH_C64(0xE005CB184CE66AF1), + SPH_C64(0xFF5CCD1D1DB99BEA), SPH_C64(0xB0B854A7FE42980F), + SPH_C64(0x7BD46A6A718D4B9F), SPH_C64(0xD10FA8CC22A5FD8C), + SPH_C64(0xD31484952BE4BD31), SPH_C64(0xC7FA975FCB243847), + SPH_C64(0x4886ED1E5846C407), SPH_C64(0x28CDDB791EB70B04), + SPH_C64(0xC2B00BE2F573417F), SPH_C64(0x5C9590452180F877), + SPH_C64(0x7A6BDDFFF370EB00), SPH_C64(0xCE509E38D6D9D6A4), + SPH_C64(0xEBEB0F00647FA702), SPH_C64(0x1DCC06CF76606F06), + SPH_C64(0xE4D9F28BA286FF0A), SPH_C64(0xD85A305DC918C262), + SPH_C64(0x475B1D8732225F54), SPH_C64(0x2D4FB51668CCB5FE), + SPH_C64(0xA679B9D9D72BBA20), SPH_C64(0x53841C0D912D43A5), + SPH_C64(0x3B7EAA48BF12A4E8), SPH_C64(0x781E0E47F22F1DDF), + SPH_C64(0xEFF20CE60AB50973), SPH_C64(0x20D261D19DFFB742), + SPH_C64(0x16A12B03062A2E39), SPH_C64(0x1960EB2239650495), + SPH_C64(0x251C16FED50EB8B8), SPH_C64(0x9AC0C330F826016E), + SPH_C64(0xED152665953E7671), SPH_C64(0x02D63194A6369570), + SPH_C64(0x5074F08394B1C987), SPH_C64(0x70BA598C90B25CE1), + SPH_C64(0x794A15810B9742F6), SPH_C64(0x0D5925E9FCAF8C6C), + SPH_C64(0x3067716CD868744E), SPH_C64(0x910AB077E8D7731B), + SPH_C64(0x6A61BBDB5AC42F61), SPH_C64(0x93513EFBF0851567), + SPH_C64(0xF494724B9E83E9D5), SPH_C64(0xE887E1985C09648D), + SPH_C64(0x34B1D3C675370CFD), SPH_C64(0xDC35E433BC0D255D), + SPH_C64(0xD0AAB84234131BE0), SPH_C64(0x08042A50B48B7EAF), + SPH_C64(0x9997C4EE44A3AB35), SPH_C64(0x829A7B49201799D0), + SPH_C64(0x263B8307B7C54441), SPH_C64(0x752F95F4FD6A6CA6), + SPH_C64(0x927217402C08C6E5), SPH_C64(0x2A8AB754A795D9EE), + SPH_C64(0xA442F7552F72943D), SPH_C64(0x2C31334E19781208), + SPH_C64(0x4FA98D7CEAEE6291), SPH_C64(0x55C3862F665DB309), + SPH_C64(0xBD0610175D53B1F3), SPH_C64(0x46FE6CB840413F27), + SPH_C64(0x3FE03792DF0CFA59), SPH_C64(0xCFE700372EB85E8F), + SPH_C64(0xA7BE29E7ADBCE118), SPH_C64(0xE544EE5CDE8431DD), + SPH_C64(0x8A781B1B41F1873E), SPH_C64(0xA5C94C78A0D2F0E7), + SPH_C64(0x39412E2877B60728), SPH_C64(0xA1265EF3AFC9A62C), + SPH_C64(0xBCC2770C6A2506C5), SPH_C64(0x3AB66DD5DCE1CE12), + SPH_C64(0xE65499D04A675B37), SPH_C64(0x7D8F523481BFD216), + SPH_C64(0x0F6F64FCEC15F389), SPH_C64(0x74EFBE618B5B13C8), + SPH_C64(0xACDC82B714273E1D), SPH_C64(0xDD40BFE003199D17), + SPH_C64(0x37E99257E7E061F8), SPH_C64(0xFA52626904775AAA), + SPH_C64(0x8BBBF63A463D56F9), SPH_C64(0xF0013F1543A26E64), + SPH_C64(0xA8307E9F879EC898), SPH_C64(0xCC4C27A4150177CC), + SPH_C64(0x1B432F2CCA1D3348), SPH_C64(0xDE1D1F8F9F6FA013), + SPH_C64(0x606602A047A7DDD6), SPH_C64(0xD237AB64CC1CB2C7), + SPH_C64(0x9B938E7225FCD1D3), SPH_C64(0xEC4E03708E0FF476), + SPH_C64(0xFEB2FBDA3D03C12D), SPH_C64(0xAE0BCED2EE43889A), + SPH_C64(0x22CB8923EBFB4F43), SPH_C64(0x69360D013CF7396D), + SPH_C64(0x855E3602D2D4E022), SPH_C64(0x073805BAD01F784C), + SPH_C64(0x33E17A133852F546), SPH_C64(0xDF4874058AC7B638), + SPH_C64(0xBA92B29C678AA14A), SPH_C64(0x0CE89FC76CFAADCD), + SPH_C64(0x5F9D4E0908339E34), SPH_C64(0xF1AFE9291F5923B9), + SPH_C64(0x6E3480F60F4A265F), SPH_C64(0xEEBF3A2AB29B841C), + SPH_C64(0xE21938A88F91B4AD), SPH_C64(0x57DFEFF845C6D3C3), + SPH_C64(0x2F006B0BF62CAAF2), SPH_C64(0x62F479EF6F75EE78), + SPH_C64(0x11A55AD41C8916A9), SPH_C64(0xF229D29084FED453), + SPH_C64(0x42F1C27B16B000E6), SPH_C64(0x2B1F76749823C074), + SPH_C64(0x4B76ECA3C2745360), SPH_C64(0x8C98F463B91691BD), + SPH_C64(0x14BCC93CF1ADE66A), SPH_C64(0x8885213E6D458397), + SPH_C64(0x8E177DF0274D4711), SPH_C64(0xB49B73B5503F2951), + SPH_C64(0x10168168C3F96B6B), SPH_C64(0x0E3D963B63CAB0AE), + SPH_C64(0x8DFC4B5655A1DB14), SPH_C64(0xF789F1356E14DE5C), + SPH_C64(0x683E68AF4E51DAC1), SPH_C64(0xC9A84F9D8D4B0FD9), + SPH_C64(0x3691E03F52A0F9D1), SPH_C64(0x5ED86E46E1878E80), + SPH_C64(0x3C711A0E99D07150), SPH_C64(0x5A0865B20C4E9310), + SPH_C64(0x56FBFC1FE4F0682E), SPH_C64(0xEA8D5DE3105EDF9B), + SPH_C64(0x71ABFDB12379187A), SPH_C64(0x2EB99DE1BEE77B9C), + SPH_C64(0x21ECC0EA33CF4523), SPH_C64(0x59A4D7521805C7A1), + SPH_C64(0x3896F5EB56AE7C72), SPH_C64(0xAA638F3DB18F75DC), + SPH_C64(0x9F39358DABE9808E), SPH_C64(0xB7DEFA91C00B72AC), + SPH_C64(0x6B5541FD62492D92), SPH_C64(0x6DC6DEE8F92E4D5B), + SPH_C64(0x353F57ABC4BEEA7E), SPH_C64(0x735769D6DA5690CE), + SPH_C64(0x0A234AA642391484), SPH_C64(0xF6F9508028F80D9D), + SPH_C64(0xB8E319A27AB3F215), SPH_C64(0x31AD9C1151341A4D), + SPH_C64(0x773C22A57BEF5805), SPH_C64(0x45C7561A07968633), + SPH_C64(0xF913DA9E249DBE36), SPH_C64(0xDA652D9B78A64C68), + SPH_C64(0x4C27A97F3BC334EF), SPH_C64(0x76621220E66B17F4), + SPH_C64(0x967743899ACD7D0B), SPH_C64(0xF3EE5BCAE0ED6782), + SPH_C64(0x409F753600C879FC), SPH_C64(0x06D09A39B5926DB6), + SPH_C64(0x6F83AEB0317AC588), SPH_C64(0x01E6CA4A86381F21), + SPH_C64(0x66FF3462D19F3025), SPH_C64(0x72207C24DDFD3BFB), + SPH_C64(0x4AF6B6D3E2ECE2EB), SPH_C64(0x9C994DBEC7EA08DE), + SPH_C64(0x49ACE597B09A8BC4), SPH_C64(0xB38C4766CF0797BA), + SPH_C64(0x131B9373C57C2A75), SPH_C64(0xB1822CCE61931E58), + SPH_C64(0x9D7555B909BA1C0C), SPH_C64(0x127FAFDD937D11D2), + SPH_C64(0x29DA3BADC66D92E4), SPH_C64(0xA2C1D57154C2ECBC), + SPH_C64(0x58C5134D82F6FE24), SPH_C64(0x1C3AE3515B62274F), + SPH_C64(0xE907C82E01CB8126), SPH_C64(0xF8ED091913E37FCB), + SPH_C64(0x3249D8F9C80046C9), SPH_C64(0x80CF9BEDE388FB63), + SPH_C64(0x1881539A116CF19E), SPH_C64(0x5103F3F76BD52457), + SPH_C64(0x15B7E6F5AE47F7A8), SPH_C64(0xDBD7C6DED47E9CCF), + SPH_C64(0x44E55C410228BB1A), SPH_C64(0xB647D4255EDB4E99), + SPH_C64(0x5D11882BB8AAFC30), SPH_C64(0xF5098BBB29D3212A), + SPH_C64(0x8FB5EA14E90296B3), SPH_C64(0x677B942157DD025A), + SPH_C64(0xFB58E7C0A390ACB5), SPH_C64(0x89D3674C83BD4A01), + SPH_C64(0x9E2DA4DF4BF3B93B), SPH_C64(0xFCC41E328CAB4829), + SPH_C64(0x03F38C96BA582C52), SPH_C64(0xCAD1BDBD7FD85DB2), + SPH_C64(0xBBB442C16082AE83), SPH_C64(0xB95FE86BA5DA9AB0), + SPH_C64(0xB22E04673771A93F), SPH_C64(0x845358C9493152D8), + SPH_C64(0xBE2A488697B4541E), SPH_C64(0x95A2DC2DD38E6966), + SPH_C64(0xC02C11AC923C852B), SPH_C64(0x2388B1990DF2A87B), + SPH_C64(0x7C8008FA1B4F37BE), SPH_C64(0x1F70D0C84D54E503), + SPH_C64(0x5490ADEC7ECE57D4), SPH_C64(0x002B3C27D9063A3A), + SPH_C64(0x7EAEA3848030A2BF), SPH_C64(0xC602326DED2003C0), + SPH_C64(0x83A7287D69A94086), SPH_C64(0xC57A5FCB30F57A8A), + SPH_C64(0xB56844E479EBE779), SPH_C64(0xA373B40F05DCBCE9), + SPH_C64(0xD71A786E88570EE2), SPH_C64(0x879CBACDBDE8F6A0), + SPH_C64(0x976AD1BCC164A32F), SPH_C64(0xAB21E25E9666D78B), + SPH_C64(0x901063AAE5E5C33C), SPH_C64(0x9818B34448698D90), + SPH_C64(0xE36487AE3E1E8ABB), SPH_C64(0xAFBDF931893BDCB4), + SPH_C64(0x6345A0DC5FBBD519), SPH_C64(0x8628FE269B9465CA), + SPH_C64(0x1E5D01603F9C51EC), SPH_C64(0x4DE44006A15049B7), + SPH_C64(0xBF6C70E5F776CBB1), SPH_C64(0x411218F2EF552BED), + SPH_C64(0xCB0C0708705A36A3), SPH_C64(0xE74D14754F986044), + SPH_C64(0xCD56D9430EA8280E), SPH_C64(0xC12591D7535F5065), + SPH_C64(0xC83223F1720AEF96), SPH_C64(0xC3A0396F7363A51F), +}; + +#define PASS(a, b, c, mul) do { \ + ROUND(a, b, c, X0, mul); \ + ROUND(b, c, a, X1, mul); \ + ROUND(c, a, b, X2, mul); \ + ROUND(a, b, c, X3, mul); \ + ROUND(b, c, a, X4, mul); \ + ROUND(c, a, b, X5, mul); \ + ROUND(a, b, c, X6, mul); \ + ROUND(b, c, a, X7, mul); \ + } while (0) + +#define ROUND(a, b, c, x, mul) do { \ + c ^= x; \ + a = SPH_T64(a - (T1[c & 0xFF] ^ T2[(c >> 16) & 0xFF] \ + ^ T3[(c >> 32) & 0xFF] ^ T4[(c >> 48) & 0xFF])); \ + b = SPH_T64(b + (T4[(c >> 8) & 0xFF] ^ T3[(c >> 24) & 0xFF] \ + ^ T2[(c >> 40) & 0xFF] ^ T1[(c >> 56) & 0xFF])); \ + b = mul(b); \ + } while (0) + +#define MUL5(x) SPH_T64((x) * SPH_C64(5)) +#define MUL7(x) SPH_T64((x) * SPH_C64(7)) +#define MUL9(x) SPH_T64((x) * SPH_C64(9)) + +#define KSCHED do { \ + X0 = SPH_T64(X0 - (X7 ^ SPH_C64(0xA5A5A5A5A5A5A5A5))); \ + X1 ^= X0; \ + X2 = SPH_T64(X2 + X1); \ + X3 = SPH_T64(X3 - (X2 ^ (~X1 << 19))); \ + X4 ^= X3; \ + X5 = SPH_T64(X5 + X4); \ + X6 = SPH_T64(X6 - (X5 ^ (~X4 >> 23))); \ + X7 ^= X6; \ + X0 = SPH_T64(X0 + X7); \ + X1 = SPH_T64(X1 - (X0 ^ (~X7 << 19))); \ + X2 ^= X1; \ + X3 = SPH_T64(X3 + X2); \ + X4 = SPH_T64(X4 - (X3 ^ (~X2 >> 23))); \ + X5 ^= X4; \ + X6 = SPH_T64(X6 + X5); \ + X7 = SPH_T64(X7 - (X6 ^ SPH_C64(0x0123456789ABCDEF))); \ + } while (0) + +#define TIGER_ROUND_BODY(in, r) do { \ + sph_u64 A, B, C; \ + sph_u64 X0, X1, X2, X3, X4, X5, X6, X7; \ + int i; \ + \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + \ + X0 = (in(0)); \ + X1 = (in(1)); \ + X2 = (in(2)); \ + X3 = (in(3)); \ + X4 = (in(4)); \ + X5 = (in(5)); \ + X6 = (in(6)); \ + X7 = (in(7)); \ + PASS(A, B, C, MUL5); \ + KSCHED; \ + PASS(C, A, B, MUL7); \ + KSCHED; \ + PASS(B, C, A, MUL9); \ + \ + (r)[0] ^= A; \ + (r)[1] = SPH_T64(B - (r)[1]); \ + (r)[2] = SPH_T64(C + (r)[2]); \ + } while (0) + +/* + * One round of Tiger. The data must be aligned for 64-bit access. + */ +static void +tiger_round(const unsigned char *data, sph_u64 r[3]) +{ +#define TIGER_IN(i) sph_dec64le_aligned(data + 8 * (i)) + TIGER_ROUND_BODY(TIGER_IN, r); +#undef TIGER_IN +} + +/* see sph_tiger.h */ +void +sph_tiger_init(void *cc) +{ + sph_tiger_context *sc; + + sc = cc; + sc->val[0] = SPH_C64(0x0123456789ABCDEF); + sc->val[1] = SPH_C64(0xFEDCBA9876543210); + sc->val[2] = SPH_C64(0xF096A5B4C3B2E187); + sc->count = 0; +} + +#define RFUN tiger_round +#define HASH tiger +#define LE64 1 +#define BLEN 64U +#define PW01 1 +#define PLW1 1 +#include "md_helper.c" + +/* see sph_tiger.h */ +void +sph_tiger_close(void *cc, void *dst) +{ + tiger_close(cc, dst, 3); + sph_tiger_init(cc); +} + +/* see sph_tiger.h */ +void +sph_tiger_comp(const sph_u64 msg[8], sph_u64 val[3]) +{ +#define TIGER_IN(i) msg[i] + TIGER_ROUND_BODY(TIGER_IN, val); +#undef TIGER_IN +} + +#undef HASH +#define HASH tiger2 +#undef PW01 +#define CLOSE_ONLY 1 +#include "md_helper.c" + +/* see sph_tiger.h */ +void +sph_tiger2_close(void *cc, void *dst) +{ + tiger2_close(cc, dst, 3); + sph_tiger2_init(cc); +} + +#endif diff --git a/sph/whirlpool.c b/sph/whirlpool.c new file mode 100644 index 0000000000..ee13e4c869 --- /dev/null +++ b/sph/whirlpool.c @@ -0,0 +1,3476 @@ +/* $Id: whirlpool.c 227 2010-06-16 17:28:38Z tp $ */ +/* + * WHIRLPOOL implementation. + * + * Internally, we use little-endian convention, on the assumption that + * architectures which favour big-endian encoding are: + * 1. rarer + * 2. in decreasing numbers + * 3. able to decode little-endian data efficiently anyway + * + * The most common big-endian architecture is Sparc, and Ultrasparc CPU + * include special opcodes to perform little-endian accesses, which we use + * (see sph_types.h). Most modern CPU designs can work with both endianness + * and architecture designer now favour little-endian (basically, x86 has + * won the endianness war). + * + * TODO: implement a 32-bit version. Not only such a version would be handy + * for non-64-bit-able architectures, but it may also use smaller tables, + * at the expense of more lookups and XORs. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_whirlpool.h" + +#if SPH_64 + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_WHIRLPOOL +#define SPH_SMALL_FOOTPRINT_WHIRLPOOL 1 +#endif + +/* ====================================================================== */ +/* + * Constants for plain WHIRLPOOL (current version). + */ + +static const sph_u64 plain_T0[256] = { + SPH_C64(0xD83078C018601818), SPH_C64(0x2646AF05238C2323), + SPH_C64(0xB891F97EC63FC6C6), SPH_C64(0xFBCD6F13E887E8E8), + SPH_C64(0xCB13A14C87268787), SPH_C64(0x116D62A9B8DAB8B8), + SPH_C64(0x0902050801040101), SPH_C64(0x0D9E6E424F214F4F), + SPH_C64(0x9B6CEEAD36D83636), SPH_C64(0xFF510459A6A2A6A6), + SPH_C64(0x0CB9BDDED26FD2D2), SPH_C64(0x0EF706FBF5F3F5F5), + SPH_C64(0x96F280EF79F97979), SPH_C64(0x30DECE5F6FA16F6F), + SPH_C64(0x6D3FEFFC917E9191), SPH_C64(0xF8A407AA52555252), + SPH_C64(0x47C0FD27609D6060), SPH_C64(0x35657689BCCABCBC), + SPH_C64(0x372BCDAC9B569B9B), SPH_C64(0x8A018C048E028E8E), + SPH_C64(0xD25B1571A3B6A3A3), SPH_C64(0x6C183C600C300C0C), + SPH_C64(0x84F68AFF7BF17B7B), SPH_C64(0x806AE1B535D43535), + SPH_C64(0xF53A69E81D741D1D), SPH_C64(0xB3DD4753E0A7E0E0), + SPH_C64(0x21B3ACF6D77BD7D7), SPH_C64(0x9C99ED5EC22FC2C2), + SPH_C64(0x435C966D2EB82E2E), SPH_C64(0x29967A624B314B4B), + SPH_C64(0x5DE121A3FEDFFEFE), SPH_C64(0xD5AE168257415757), + SPH_C64(0xBD2A41A815541515), SPH_C64(0xE8EEB69F77C17777), + SPH_C64(0x926EEBA537DC3737), SPH_C64(0x9ED7567BE5B3E5E5), + SPH_C64(0x1323D98C9F469F9F), SPH_C64(0x23FD17D3F0E7F0F0), + SPH_C64(0x20947F6A4A354A4A), SPH_C64(0x44A9959EDA4FDADA), + SPH_C64(0xA2B025FA587D5858), SPH_C64(0xCF8FCA06C903C9C9), + SPH_C64(0x7C528D5529A42929), SPH_C64(0x5A1422500A280A0A), + SPH_C64(0x507F4FE1B1FEB1B1), SPH_C64(0xC95D1A69A0BAA0A0), + SPH_C64(0x14D6DA7F6BB16B6B), SPH_C64(0xD917AB5C852E8585), + SPH_C64(0x3C677381BDCEBDBD), SPH_C64(0x8FBA34D25D695D5D), + SPH_C64(0x9020508010401010), SPH_C64(0x07F503F3F4F7F4F4), + SPH_C64(0xDD8BC016CB0BCBCB), SPH_C64(0xD37CC6ED3EF83E3E), + SPH_C64(0x2D0A112805140505), SPH_C64(0x78CEE61F67816767), + SPH_C64(0x97D55373E4B7E4E4), SPH_C64(0x024EBB25279C2727), + SPH_C64(0x7382583241194141), SPH_C64(0xA70B9D2C8B168B8B), + SPH_C64(0xF6530151A7A6A7A7), SPH_C64(0xB2FA94CF7DE97D7D), + SPH_C64(0x4937FBDC956E9595), SPH_C64(0x56AD9F8ED847D8D8), + SPH_C64(0x70EB308BFBCBFBFB), SPH_C64(0xCDC17123EE9FEEEE), + SPH_C64(0xBBF891C77CED7C7C), SPH_C64(0x71CCE31766856666), + SPH_C64(0x7BA78EA6DD53DDDD), SPH_C64(0xAF2E4BB8175C1717), + SPH_C64(0x458E460247014747), SPH_C64(0x1A21DC849E429E9E), + SPH_C64(0xD489C51ECA0FCACA), SPH_C64(0x585A99752DB42D2D), + SPH_C64(0x2E637991BFC6BFBF), SPH_C64(0x3F0E1B38071C0707), + SPH_C64(0xAC472301AD8EADAD), SPH_C64(0xB0B42FEA5A755A5A), + SPH_C64(0xEF1BB56C83368383), SPH_C64(0xB666FF8533CC3333), + SPH_C64(0x5CC6F23F63916363), SPH_C64(0x12040A1002080202), + SPH_C64(0x93493839AA92AAAA), SPH_C64(0xDEE2A8AF71D97171), + SPH_C64(0xC68DCF0EC807C8C8), SPH_C64(0xD1327DC819641919), + SPH_C64(0x3B92707249394949), SPH_C64(0x5FAF9A86D943D9D9), + SPH_C64(0x31F91DC3F2EFF2F2), SPH_C64(0xA8DB484BE3ABE3E3), + SPH_C64(0xB9B62AE25B715B5B), SPH_C64(0xBC0D9234881A8888), + SPH_C64(0x3E29C8A49A529A9A), SPH_C64(0x0B4CBE2D26982626), + SPH_C64(0xBF64FA8D32C83232), SPH_C64(0x597D4AE9B0FAB0B0), + SPH_C64(0xF2CF6A1BE983E9E9), SPH_C64(0x771E33780F3C0F0F), + SPH_C64(0x33B7A6E6D573D5D5), SPH_C64(0xF41DBA74803A8080), + SPH_C64(0x27617C99BEC2BEBE), SPH_C64(0xEB87DE26CD13CDCD), + SPH_C64(0x8968E4BD34D03434), SPH_C64(0x3290757A483D4848), + SPH_C64(0x54E324ABFFDBFFFF), SPH_C64(0x8DF48FF77AF57A7A), + SPH_C64(0x643DEAF4907A9090), SPH_C64(0x9DBE3EC25F615F5F), + SPH_C64(0x3D40A01D20802020), SPH_C64(0x0FD0D56768BD6868), + SPH_C64(0xCA3472D01A681A1A), SPH_C64(0xB7412C19AE82AEAE), + SPH_C64(0x7D755EC9B4EAB4B4), SPH_C64(0xCEA8199A544D5454), + SPH_C64(0x7F3BE5EC93769393), SPH_C64(0x2F44AA0D22882222), + SPH_C64(0x63C8E907648D6464), SPH_C64(0x2AFF12DBF1E3F1F1), + SPH_C64(0xCCE6A2BF73D17373), SPH_C64(0x82245A9012481212), + SPH_C64(0x7A805D3A401D4040), SPH_C64(0x4810284008200808), + SPH_C64(0x959BE856C32BC3C3), SPH_C64(0xDFC57B33EC97ECEC), + SPH_C64(0x4DAB9096DB4BDBDB), SPH_C64(0xC05F1F61A1BEA1A1), + SPH_C64(0x9107831C8D0E8D8D), SPH_C64(0xC87AC9F53DF43D3D), + SPH_C64(0x5B33F1CC97669797), SPH_C64(0x0000000000000000), + SPH_C64(0xF983D436CF1BCFCF), SPH_C64(0x6E5687452BAC2B2B), + SPH_C64(0xE1ECB39776C57676), SPH_C64(0xE619B06482328282), + SPH_C64(0x28B1A9FED67FD6D6), SPH_C64(0xC33677D81B6C1B1B), + SPH_C64(0x74775BC1B5EEB5B5), SPH_C64(0xBE432911AF86AFAF), + SPH_C64(0x1DD4DF776AB56A6A), SPH_C64(0xEAA00DBA505D5050), + SPH_C64(0x578A4C1245094545), SPH_C64(0x38FB18CBF3EBF3F3), + SPH_C64(0xAD60F09D30C03030), SPH_C64(0xC4C3742BEF9BEFEF), + SPH_C64(0xDA7EC3E53FFC3F3F), SPH_C64(0xC7AA1C9255495555), + SPH_C64(0xDB591079A2B2A2A2), SPH_C64(0xE9C96503EA8FEAEA), + SPH_C64(0x6ACAEC0F65896565), SPH_C64(0x036968B9BAD2BABA), + SPH_C64(0x4A5E93652FBC2F2F), SPH_C64(0x8E9DE74EC027C0C0), + SPH_C64(0x60A181BEDE5FDEDE), SPH_C64(0xFC386CE01C701C1C), + SPH_C64(0x46E72EBBFDD3FDFD), SPH_C64(0x1F9A64524D294D4D), + SPH_C64(0x7639E0E492729292), SPH_C64(0xFAEABC8F75C97575), + SPH_C64(0x360C1E3006180606), SPH_C64(0xAE0998248A128A8A), + SPH_C64(0x4B7940F9B2F2B2B2), SPH_C64(0x85D15963E6BFE6E6), + SPH_C64(0x7E1C36700E380E0E), SPH_C64(0xE73E63F81F7C1F1F), + SPH_C64(0x55C4F73762956262), SPH_C64(0x3AB5A3EED477D4D4), + SPH_C64(0x814D3229A89AA8A8), SPH_C64(0x5231F4C496629696), + SPH_C64(0x62EF3A9BF9C3F9F9), SPH_C64(0xA397F666C533C5C5), + SPH_C64(0x104AB13525942525), SPH_C64(0xABB220F259795959), + SPH_C64(0xD015AE54842A8484), SPH_C64(0xC5E4A7B772D57272), + SPH_C64(0xEC72DDD539E43939), SPH_C64(0x1698615A4C2D4C4C), + SPH_C64(0x94BC3BCA5E655E5E), SPH_C64(0x9FF085E778FD7878), + SPH_C64(0xE570D8DD38E03838), SPH_C64(0x980586148C0A8C8C), + SPH_C64(0x17BFB2C6D163D1D1), SPH_C64(0xE4570B41A5AEA5A5), + SPH_C64(0xA1D94D43E2AFE2E2), SPH_C64(0x4EC2F82F61996161), + SPH_C64(0x427B45F1B3F6B3B3), SPH_C64(0x3442A51521842121), + SPH_C64(0x0825D6949C4A9C9C), SPH_C64(0xEE3C66F01E781E1E), + SPH_C64(0x6186522243114343), SPH_C64(0xB193FC76C73BC7C7), + SPH_C64(0x4FE52BB3FCD7FCFC), SPH_C64(0x2408142004100404), + SPH_C64(0xE3A208B251595151), SPH_C64(0x252FC7BC995E9999), + SPH_C64(0x22DAC44F6DA96D6D), SPH_C64(0x651A39680D340D0D), + SPH_C64(0x79E93583FACFFAFA), SPH_C64(0x69A384B6DF5BDFDF), + SPH_C64(0xA9FC9BD77EE57E7E), SPH_C64(0x1948B43D24902424), + SPH_C64(0xFE76D7C53BEC3B3B), SPH_C64(0x9A4B3D31AB96ABAB), + SPH_C64(0xF081D13ECE1FCECE), SPH_C64(0x9922558811441111), + SPH_C64(0x8303890C8F068F8F), SPH_C64(0x049C6B4A4E254E4E), + SPH_C64(0x667351D1B7E6B7B7), SPH_C64(0xE0CB600BEB8BEBEB), + SPH_C64(0xC178CCFD3CF03C3C), SPH_C64(0xFD1FBF7C813E8181), + SPH_C64(0x4035FED4946A9494), SPH_C64(0x1CF30CEBF7FBF7F7), + SPH_C64(0x186F67A1B9DEB9B9), SPH_C64(0x8B265F98134C1313), + SPH_C64(0x51589C7D2CB02C2C), SPH_C64(0x05BBB8D6D36BD3D3), + SPH_C64(0x8CD35C6BE7BBE7E7), SPH_C64(0x39DCCB576EA56E6E), + SPH_C64(0xAA95F36EC437C4C4), SPH_C64(0x1B060F18030C0303), + SPH_C64(0xDCAC138A56455656), SPH_C64(0x5E88491A440D4444), + SPH_C64(0xA0FE9EDF7FE17F7F), SPH_C64(0x884F3721A99EA9A9), + SPH_C64(0x6754824D2AA82A2A), SPH_C64(0x0A6B6DB1BBD6BBBB), + SPH_C64(0x879FE246C123C1C1), SPH_C64(0xF1A602A253515353), + SPH_C64(0x72A58BAEDC57DCDC), SPH_C64(0x531627580B2C0B0B), + SPH_C64(0x0127D39C9D4E9D9D), SPH_C64(0x2BD8C1476CAD6C6C), + SPH_C64(0xA462F59531C43131), SPH_C64(0xF3E8B98774CD7474), + SPH_C64(0x15F109E3F6FFF6F6), SPH_C64(0x4C8C430A46054646), + SPH_C64(0xA5452609AC8AACAC), SPH_C64(0xB50F973C891E8989), + SPH_C64(0xB42844A014501414), SPH_C64(0xBADF425BE1A3E1E1), + SPH_C64(0xA62C4EB016581616), SPH_C64(0xF774D2CD3AE83A3A), + SPH_C64(0x06D2D06F69B96969), SPH_C64(0x41122D4809240909), + SPH_C64(0xD7E0ADA770DD7070), SPH_C64(0x6F7154D9B6E2B6B6), + SPH_C64(0x1EBDB7CED067D0D0), SPH_C64(0xD6C77E3BED93EDED), + SPH_C64(0xE285DB2ECC17CCCC), SPH_C64(0x6884572A42154242), + SPH_C64(0x2C2DC2B4985A9898), SPH_C64(0xED550E49A4AAA4A4), + SPH_C64(0x7550885D28A02828), SPH_C64(0x86B831DA5C6D5C5C), + SPH_C64(0x6BED3F93F8C7F8F8), SPH_C64(0xC211A44486228686) +}; + +#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL + +static const sph_u64 plain_T1[256] = { + SPH_C64(0x3078C018601818D8), SPH_C64(0x46AF05238C232326), + SPH_C64(0x91F97EC63FC6C6B8), SPH_C64(0xCD6F13E887E8E8FB), + SPH_C64(0x13A14C87268787CB), SPH_C64(0x6D62A9B8DAB8B811), + SPH_C64(0x0205080104010109), SPH_C64(0x9E6E424F214F4F0D), + SPH_C64(0x6CEEAD36D836369B), SPH_C64(0x510459A6A2A6A6FF), + SPH_C64(0xB9BDDED26FD2D20C), SPH_C64(0xF706FBF5F3F5F50E), + SPH_C64(0xF280EF79F9797996), SPH_C64(0xDECE5F6FA16F6F30), + SPH_C64(0x3FEFFC917E91916D), SPH_C64(0xA407AA52555252F8), + SPH_C64(0xC0FD27609D606047), SPH_C64(0x657689BCCABCBC35), + SPH_C64(0x2BCDAC9B569B9B37), SPH_C64(0x018C048E028E8E8A), + SPH_C64(0x5B1571A3B6A3A3D2), SPH_C64(0x183C600C300C0C6C), + SPH_C64(0xF68AFF7BF17B7B84), SPH_C64(0x6AE1B535D4353580), + SPH_C64(0x3A69E81D741D1DF5), SPH_C64(0xDD4753E0A7E0E0B3), + SPH_C64(0xB3ACF6D77BD7D721), SPH_C64(0x99ED5EC22FC2C29C), + SPH_C64(0x5C966D2EB82E2E43), SPH_C64(0x967A624B314B4B29), + SPH_C64(0xE121A3FEDFFEFE5D), SPH_C64(0xAE168257415757D5), + SPH_C64(0x2A41A815541515BD), SPH_C64(0xEEB69F77C17777E8), + SPH_C64(0x6EEBA537DC373792), SPH_C64(0xD7567BE5B3E5E59E), + SPH_C64(0x23D98C9F469F9F13), SPH_C64(0xFD17D3F0E7F0F023), + SPH_C64(0x947F6A4A354A4A20), SPH_C64(0xA9959EDA4FDADA44), + SPH_C64(0xB025FA587D5858A2), SPH_C64(0x8FCA06C903C9C9CF), + SPH_C64(0x528D5529A429297C), SPH_C64(0x1422500A280A0A5A), + SPH_C64(0x7F4FE1B1FEB1B150), SPH_C64(0x5D1A69A0BAA0A0C9), + SPH_C64(0xD6DA7F6BB16B6B14), SPH_C64(0x17AB5C852E8585D9), + SPH_C64(0x677381BDCEBDBD3C), SPH_C64(0xBA34D25D695D5D8F), + SPH_C64(0x2050801040101090), SPH_C64(0xF503F3F4F7F4F407), + SPH_C64(0x8BC016CB0BCBCBDD), SPH_C64(0x7CC6ED3EF83E3ED3), + SPH_C64(0x0A1128051405052D), SPH_C64(0xCEE61F6781676778), + SPH_C64(0xD55373E4B7E4E497), SPH_C64(0x4EBB25279C272702), + SPH_C64(0x8258324119414173), SPH_C64(0x0B9D2C8B168B8BA7), + SPH_C64(0x530151A7A6A7A7F6), SPH_C64(0xFA94CF7DE97D7DB2), + SPH_C64(0x37FBDC956E959549), SPH_C64(0xAD9F8ED847D8D856), + SPH_C64(0xEB308BFBCBFBFB70), SPH_C64(0xC17123EE9FEEEECD), + SPH_C64(0xF891C77CED7C7CBB), SPH_C64(0xCCE3176685666671), + SPH_C64(0xA78EA6DD53DDDD7B), SPH_C64(0x2E4BB8175C1717AF), + SPH_C64(0x8E46024701474745), SPH_C64(0x21DC849E429E9E1A), + SPH_C64(0x89C51ECA0FCACAD4), SPH_C64(0x5A99752DB42D2D58), + SPH_C64(0x637991BFC6BFBF2E), SPH_C64(0x0E1B38071C07073F), + SPH_C64(0x472301AD8EADADAC), SPH_C64(0xB42FEA5A755A5AB0), + SPH_C64(0x1BB56C83368383EF), SPH_C64(0x66FF8533CC3333B6), + SPH_C64(0xC6F23F639163635C), SPH_C64(0x040A100208020212), + SPH_C64(0x493839AA92AAAA93), SPH_C64(0xE2A8AF71D97171DE), + SPH_C64(0x8DCF0EC807C8C8C6), SPH_C64(0x327DC819641919D1), + SPH_C64(0x927072493949493B), SPH_C64(0xAF9A86D943D9D95F), + SPH_C64(0xF91DC3F2EFF2F231), SPH_C64(0xDB484BE3ABE3E3A8), + SPH_C64(0xB62AE25B715B5BB9), SPH_C64(0x0D9234881A8888BC), + SPH_C64(0x29C8A49A529A9A3E), SPH_C64(0x4CBE2D269826260B), + SPH_C64(0x64FA8D32C83232BF), SPH_C64(0x7D4AE9B0FAB0B059), + SPH_C64(0xCF6A1BE983E9E9F2), SPH_C64(0x1E33780F3C0F0F77), + SPH_C64(0xB7A6E6D573D5D533), SPH_C64(0x1DBA74803A8080F4), + SPH_C64(0x617C99BEC2BEBE27), SPH_C64(0x87DE26CD13CDCDEB), + SPH_C64(0x68E4BD34D0343489), SPH_C64(0x90757A483D484832), + SPH_C64(0xE324ABFFDBFFFF54), SPH_C64(0xF48FF77AF57A7A8D), + SPH_C64(0x3DEAF4907A909064), SPH_C64(0xBE3EC25F615F5F9D), + SPH_C64(0x40A01D208020203D), SPH_C64(0xD0D56768BD68680F), + SPH_C64(0x3472D01A681A1ACA), SPH_C64(0x412C19AE82AEAEB7), + SPH_C64(0x755EC9B4EAB4B47D), SPH_C64(0xA8199A544D5454CE), + SPH_C64(0x3BE5EC937693937F), SPH_C64(0x44AA0D228822222F), + SPH_C64(0xC8E907648D646463), SPH_C64(0xFF12DBF1E3F1F12A), + SPH_C64(0xE6A2BF73D17373CC), SPH_C64(0x245A901248121282), + SPH_C64(0x805D3A401D40407A), SPH_C64(0x1028400820080848), + SPH_C64(0x9BE856C32BC3C395), SPH_C64(0xC57B33EC97ECECDF), + SPH_C64(0xAB9096DB4BDBDB4D), SPH_C64(0x5F1F61A1BEA1A1C0), + SPH_C64(0x07831C8D0E8D8D91), SPH_C64(0x7AC9F53DF43D3DC8), + SPH_C64(0x33F1CC976697975B), SPH_C64(0x0000000000000000), + SPH_C64(0x83D436CF1BCFCFF9), SPH_C64(0x5687452BAC2B2B6E), + SPH_C64(0xECB39776C57676E1), SPH_C64(0x19B06482328282E6), + SPH_C64(0xB1A9FED67FD6D628), SPH_C64(0x3677D81B6C1B1BC3), + SPH_C64(0x775BC1B5EEB5B574), SPH_C64(0x432911AF86AFAFBE), + SPH_C64(0xD4DF776AB56A6A1D), SPH_C64(0xA00DBA505D5050EA), + SPH_C64(0x8A4C124509454557), SPH_C64(0xFB18CBF3EBF3F338), + SPH_C64(0x60F09D30C03030AD), SPH_C64(0xC3742BEF9BEFEFC4), + SPH_C64(0x7EC3E53FFC3F3FDA), SPH_C64(0xAA1C9255495555C7), + SPH_C64(0x591079A2B2A2A2DB), SPH_C64(0xC96503EA8FEAEAE9), + SPH_C64(0xCAEC0F658965656A), SPH_C64(0x6968B9BAD2BABA03), + SPH_C64(0x5E93652FBC2F2F4A), SPH_C64(0x9DE74EC027C0C08E), + SPH_C64(0xA181BEDE5FDEDE60), SPH_C64(0x386CE01C701C1CFC), + SPH_C64(0xE72EBBFDD3FDFD46), SPH_C64(0x9A64524D294D4D1F), + SPH_C64(0x39E0E49272929276), SPH_C64(0xEABC8F75C97575FA), + SPH_C64(0x0C1E300618060636), SPH_C64(0x0998248A128A8AAE), + SPH_C64(0x7940F9B2F2B2B24B), SPH_C64(0xD15963E6BFE6E685), + SPH_C64(0x1C36700E380E0E7E), SPH_C64(0x3E63F81F7C1F1FE7), + SPH_C64(0xC4F7376295626255), SPH_C64(0xB5A3EED477D4D43A), + SPH_C64(0x4D3229A89AA8A881), SPH_C64(0x31F4C49662969652), + SPH_C64(0xEF3A9BF9C3F9F962), SPH_C64(0x97F666C533C5C5A3), + SPH_C64(0x4AB1352594252510), SPH_C64(0xB220F259795959AB), + SPH_C64(0x15AE54842A8484D0), SPH_C64(0xE4A7B772D57272C5), + SPH_C64(0x72DDD539E43939EC), SPH_C64(0x98615A4C2D4C4C16), + SPH_C64(0xBC3BCA5E655E5E94), SPH_C64(0xF085E778FD78789F), + SPH_C64(0x70D8DD38E03838E5), SPH_C64(0x0586148C0A8C8C98), + SPH_C64(0xBFB2C6D163D1D117), SPH_C64(0x570B41A5AEA5A5E4), + SPH_C64(0xD94D43E2AFE2E2A1), SPH_C64(0xC2F82F619961614E), + SPH_C64(0x7B45F1B3F6B3B342), SPH_C64(0x42A5152184212134), + SPH_C64(0x25D6949C4A9C9C08), SPH_C64(0x3C66F01E781E1EEE), + SPH_C64(0x8652224311434361), SPH_C64(0x93FC76C73BC7C7B1), + SPH_C64(0xE52BB3FCD7FCFC4F), SPH_C64(0x0814200410040424), + SPH_C64(0xA208B251595151E3), SPH_C64(0x2FC7BC995E999925), + SPH_C64(0xDAC44F6DA96D6D22), SPH_C64(0x1A39680D340D0D65), + SPH_C64(0xE93583FACFFAFA79), SPH_C64(0xA384B6DF5BDFDF69), + SPH_C64(0xFC9BD77EE57E7EA9), SPH_C64(0x48B43D2490242419), + SPH_C64(0x76D7C53BEC3B3BFE), SPH_C64(0x4B3D31AB96ABAB9A), + SPH_C64(0x81D13ECE1FCECEF0), SPH_C64(0x2255881144111199), + SPH_C64(0x03890C8F068F8F83), SPH_C64(0x9C6B4A4E254E4E04), + SPH_C64(0x7351D1B7E6B7B766), SPH_C64(0xCB600BEB8BEBEBE0), + SPH_C64(0x78CCFD3CF03C3CC1), SPH_C64(0x1FBF7C813E8181FD), + SPH_C64(0x35FED4946A949440), SPH_C64(0xF30CEBF7FBF7F71C), + SPH_C64(0x6F67A1B9DEB9B918), SPH_C64(0x265F98134C13138B), + SPH_C64(0x589C7D2CB02C2C51), SPH_C64(0xBBB8D6D36BD3D305), + SPH_C64(0xD35C6BE7BBE7E78C), SPH_C64(0xDCCB576EA56E6E39), + SPH_C64(0x95F36EC437C4C4AA), SPH_C64(0x060F18030C03031B), + SPH_C64(0xAC138A56455656DC), SPH_C64(0x88491A440D44445E), + SPH_C64(0xFE9EDF7FE17F7FA0), SPH_C64(0x4F3721A99EA9A988), + SPH_C64(0x54824D2AA82A2A67), SPH_C64(0x6B6DB1BBD6BBBB0A), + SPH_C64(0x9FE246C123C1C187), SPH_C64(0xA602A253515353F1), + SPH_C64(0xA58BAEDC57DCDC72), SPH_C64(0x1627580B2C0B0B53), + SPH_C64(0x27D39C9D4E9D9D01), SPH_C64(0xD8C1476CAD6C6C2B), + SPH_C64(0x62F59531C43131A4), SPH_C64(0xE8B98774CD7474F3), + SPH_C64(0xF109E3F6FFF6F615), SPH_C64(0x8C430A460546464C), + SPH_C64(0x452609AC8AACACA5), SPH_C64(0x0F973C891E8989B5), + SPH_C64(0x2844A014501414B4), SPH_C64(0xDF425BE1A3E1E1BA), + SPH_C64(0x2C4EB016581616A6), SPH_C64(0x74D2CD3AE83A3AF7), + SPH_C64(0xD2D06F69B9696906), SPH_C64(0x122D480924090941), + SPH_C64(0xE0ADA770DD7070D7), SPH_C64(0x7154D9B6E2B6B66F), + SPH_C64(0xBDB7CED067D0D01E), SPH_C64(0xC77E3BED93EDEDD6), + SPH_C64(0x85DB2ECC17CCCCE2), SPH_C64(0x84572A4215424268), + SPH_C64(0x2DC2B4985A98982C), SPH_C64(0x550E49A4AAA4A4ED), + SPH_C64(0x50885D28A0282875), SPH_C64(0xB831DA5C6D5C5C86), + SPH_C64(0xED3F93F8C7F8F86B), SPH_C64(0x11A44486228686C2) +}; + +static const sph_u64 plain_T2[256] = { + SPH_C64(0x78C018601818D830), SPH_C64(0xAF05238C23232646), + SPH_C64(0xF97EC63FC6C6B891), SPH_C64(0x6F13E887E8E8FBCD), + SPH_C64(0xA14C87268787CB13), SPH_C64(0x62A9B8DAB8B8116D), + SPH_C64(0x0508010401010902), SPH_C64(0x6E424F214F4F0D9E), + SPH_C64(0xEEAD36D836369B6C), SPH_C64(0x0459A6A2A6A6FF51), + SPH_C64(0xBDDED26FD2D20CB9), SPH_C64(0x06FBF5F3F5F50EF7), + SPH_C64(0x80EF79F9797996F2), SPH_C64(0xCE5F6FA16F6F30DE), + SPH_C64(0xEFFC917E91916D3F), SPH_C64(0x07AA52555252F8A4), + SPH_C64(0xFD27609D606047C0), SPH_C64(0x7689BCCABCBC3565), + SPH_C64(0xCDAC9B569B9B372B), SPH_C64(0x8C048E028E8E8A01), + SPH_C64(0x1571A3B6A3A3D25B), SPH_C64(0x3C600C300C0C6C18), + SPH_C64(0x8AFF7BF17B7B84F6), SPH_C64(0xE1B535D43535806A), + SPH_C64(0x69E81D741D1DF53A), SPH_C64(0x4753E0A7E0E0B3DD), + SPH_C64(0xACF6D77BD7D721B3), SPH_C64(0xED5EC22FC2C29C99), + SPH_C64(0x966D2EB82E2E435C), SPH_C64(0x7A624B314B4B2996), + SPH_C64(0x21A3FEDFFEFE5DE1), SPH_C64(0x168257415757D5AE), + SPH_C64(0x41A815541515BD2A), SPH_C64(0xB69F77C17777E8EE), + SPH_C64(0xEBA537DC3737926E), SPH_C64(0x567BE5B3E5E59ED7), + SPH_C64(0xD98C9F469F9F1323), SPH_C64(0x17D3F0E7F0F023FD), + SPH_C64(0x7F6A4A354A4A2094), SPH_C64(0x959EDA4FDADA44A9), + SPH_C64(0x25FA587D5858A2B0), SPH_C64(0xCA06C903C9C9CF8F), + SPH_C64(0x8D5529A429297C52), SPH_C64(0x22500A280A0A5A14), + SPH_C64(0x4FE1B1FEB1B1507F), SPH_C64(0x1A69A0BAA0A0C95D), + SPH_C64(0xDA7F6BB16B6B14D6), SPH_C64(0xAB5C852E8585D917), + SPH_C64(0x7381BDCEBDBD3C67), SPH_C64(0x34D25D695D5D8FBA), + SPH_C64(0x5080104010109020), SPH_C64(0x03F3F4F7F4F407F5), + SPH_C64(0xC016CB0BCBCBDD8B), SPH_C64(0xC6ED3EF83E3ED37C), + SPH_C64(0x1128051405052D0A), SPH_C64(0xE61F6781676778CE), + SPH_C64(0x5373E4B7E4E497D5), SPH_C64(0xBB25279C2727024E), + SPH_C64(0x5832411941417382), SPH_C64(0x9D2C8B168B8BA70B), + SPH_C64(0x0151A7A6A7A7F653), SPH_C64(0x94CF7DE97D7DB2FA), + SPH_C64(0xFBDC956E95954937), SPH_C64(0x9F8ED847D8D856AD), + SPH_C64(0x308BFBCBFBFB70EB), SPH_C64(0x7123EE9FEEEECDC1), + SPH_C64(0x91C77CED7C7CBBF8), SPH_C64(0xE3176685666671CC), + SPH_C64(0x8EA6DD53DDDD7BA7), SPH_C64(0x4BB8175C1717AF2E), + SPH_C64(0x460247014747458E), SPH_C64(0xDC849E429E9E1A21), + SPH_C64(0xC51ECA0FCACAD489), SPH_C64(0x99752DB42D2D585A), + SPH_C64(0x7991BFC6BFBF2E63), SPH_C64(0x1B38071C07073F0E), + SPH_C64(0x2301AD8EADADAC47), SPH_C64(0x2FEA5A755A5AB0B4), + SPH_C64(0xB56C83368383EF1B), SPH_C64(0xFF8533CC3333B666), + SPH_C64(0xF23F639163635CC6), SPH_C64(0x0A10020802021204), + SPH_C64(0x3839AA92AAAA9349), SPH_C64(0xA8AF71D97171DEE2), + SPH_C64(0xCF0EC807C8C8C68D), SPH_C64(0x7DC819641919D132), + SPH_C64(0x7072493949493B92), SPH_C64(0x9A86D943D9D95FAF), + SPH_C64(0x1DC3F2EFF2F231F9), SPH_C64(0x484BE3ABE3E3A8DB), + SPH_C64(0x2AE25B715B5BB9B6), SPH_C64(0x9234881A8888BC0D), + SPH_C64(0xC8A49A529A9A3E29), SPH_C64(0xBE2D269826260B4C), + SPH_C64(0xFA8D32C83232BF64), SPH_C64(0x4AE9B0FAB0B0597D), + SPH_C64(0x6A1BE983E9E9F2CF), SPH_C64(0x33780F3C0F0F771E), + SPH_C64(0xA6E6D573D5D533B7), SPH_C64(0xBA74803A8080F41D), + SPH_C64(0x7C99BEC2BEBE2761), SPH_C64(0xDE26CD13CDCDEB87), + SPH_C64(0xE4BD34D034348968), SPH_C64(0x757A483D48483290), + SPH_C64(0x24ABFFDBFFFF54E3), SPH_C64(0x8FF77AF57A7A8DF4), + SPH_C64(0xEAF4907A9090643D), SPH_C64(0x3EC25F615F5F9DBE), + SPH_C64(0xA01D208020203D40), SPH_C64(0xD56768BD68680FD0), + SPH_C64(0x72D01A681A1ACA34), SPH_C64(0x2C19AE82AEAEB741), + SPH_C64(0x5EC9B4EAB4B47D75), SPH_C64(0x199A544D5454CEA8), + SPH_C64(0xE5EC937693937F3B), SPH_C64(0xAA0D228822222F44), + SPH_C64(0xE907648D646463C8), SPH_C64(0x12DBF1E3F1F12AFF), + SPH_C64(0xA2BF73D17373CCE6), SPH_C64(0x5A90124812128224), + SPH_C64(0x5D3A401D40407A80), SPH_C64(0x2840082008084810), + SPH_C64(0xE856C32BC3C3959B), SPH_C64(0x7B33EC97ECECDFC5), + SPH_C64(0x9096DB4BDBDB4DAB), SPH_C64(0x1F61A1BEA1A1C05F), + SPH_C64(0x831C8D0E8D8D9107), SPH_C64(0xC9F53DF43D3DC87A), + SPH_C64(0xF1CC976697975B33), SPH_C64(0x0000000000000000), + SPH_C64(0xD436CF1BCFCFF983), SPH_C64(0x87452BAC2B2B6E56), + SPH_C64(0xB39776C57676E1EC), SPH_C64(0xB06482328282E619), + SPH_C64(0xA9FED67FD6D628B1), SPH_C64(0x77D81B6C1B1BC336), + SPH_C64(0x5BC1B5EEB5B57477), SPH_C64(0x2911AF86AFAFBE43), + SPH_C64(0xDF776AB56A6A1DD4), SPH_C64(0x0DBA505D5050EAA0), + SPH_C64(0x4C1245094545578A), SPH_C64(0x18CBF3EBF3F338FB), + SPH_C64(0xF09D30C03030AD60), SPH_C64(0x742BEF9BEFEFC4C3), + SPH_C64(0xC3E53FFC3F3FDA7E), SPH_C64(0x1C9255495555C7AA), + SPH_C64(0x1079A2B2A2A2DB59), SPH_C64(0x6503EA8FEAEAE9C9), + SPH_C64(0xEC0F658965656ACA), SPH_C64(0x68B9BAD2BABA0369), + SPH_C64(0x93652FBC2F2F4A5E), SPH_C64(0xE74EC027C0C08E9D), + SPH_C64(0x81BEDE5FDEDE60A1), SPH_C64(0x6CE01C701C1CFC38), + SPH_C64(0x2EBBFDD3FDFD46E7), SPH_C64(0x64524D294D4D1F9A), + SPH_C64(0xE0E4927292927639), SPH_C64(0xBC8F75C97575FAEA), + SPH_C64(0x1E3006180606360C), SPH_C64(0x98248A128A8AAE09), + SPH_C64(0x40F9B2F2B2B24B79), SPH_C64(0x5963E6BFE6E685D1), + SPH_C64(0x36700E380E0E7E1C), SPH_C64(0x63F81F7C1F1FE73E), + SPH_C64(0xF7376295626255C4), SPH_C64(0xA3EED477D4D43AB5), + SPH_C64(0x3229A89AA8A8814D), SPH_C64(0xF4C4966296965231), + SPH_C64(0x3A9BF9C3F9F962EF), SPH_C64(0xF666C533C5C5A397), + SPH_C64(0xB13525942525104A), SPH_C64(0x20F259795959ABB2), + SPH_C64(0xAE54842A8484D015), SPH_C64(0xA7B772D57272C5E4), + SPH_C64(0xDDD539E43939EC72), SPH_C64(0x615A4C2D4C4C1698), + SPH_C64(0x3BCA5E655E5E94BC), SPH_C64(0x85E778FD78789FF0), + SPH_C64(0xD8DD38E03838E570), SPH_C64(0x86148C0A8C8C9805), + SPH_C64(0xB2C6D163D1D117BF), SPH_C64(0x0B41A5AEA5A5E457), + SPH_C64(0x4D43E2AFE2E2A1D9), SPH_C64(0xF82F619961614EC2), + SPH_C64(0x45F1B3F6B3B3427B), SPH_C64(0xA515218421213442), + SPH_C64(0xD6949C4A9C9C0825), SPH_C64(0x66F01E781E1EEE3C), + SPH_C64(0x5222431143436186), SPH_C64(0xFC76C73BC7C7B193), + SPH_C64(0x2BB3FCD7FCFC4FE5), SPH_C64(0x1420041004042408), + SPH_C64(0x08B251595151E3A2), SPH_C64(0xC7BC995E9999252F), + SPH_C64(0xC44F6DA96D6D22DA), SPH_C64(0x39680D340D0D651A), + SPH_C64(0x3583FACFFAFA79E9), SPH_C64(0x84B6DF5BDFDF69A3), + SPH_C64(0x9BD77EE57E7EA9FC), SPH_C64(0xB43D249024241948), + SPH_C64(0xD7C53BEC3B3BFE76), SPH_C64(0x3D31AB96ABAB9A4B), + SPH_C64(0xD13ECE1FCECEF081), SPH_C64(0x5588114411119922), + SPH_C64(0x890C8F068F8F8303), SPH_C64(0x6B4A4E254E4E049C), + SPH_C64(0x51D1B7E6B7B76673), SPH_C64(0x600BEB8BEBEBE0CB), + SPH_C64(0xCCFD3CF03C3CC178), SPH_C64(0xBF7C813E8181FD1F), + SPH_C64(0xFED4946A94944035), SPH_C64(0x0CEBF7FBF7F71CF3), + SPH_C64(0x67A1B9DEB9B9186F), SPH_C64(0x5F98134C13138B26), + SPH_C64(0x9C7D2CB02C2C5158), SPH_C64(0xB8D6D36BD3D305BB), + SPH_C64(0x5C6BE7BBE7E78CD3), SPH_C64(0xCB576EA56E6E39DC), + SPH_C64(0xF36EC437C4C4AA95), SPH_C64(0x0F18030C03031B06), + SPH_C64(0x138A56455656DCAC), SPH_C64(0x491A440D44445E88), + SPH_C64(0x9EDF7FE17F7FA0FE), SPH_C64(0x3721A99EA9A9884F), + SPH_C64(0x824D2AA82A2A6754), SPH_C64(0x6DB1BBD6BBBB0A6B), + SPH_C64(0xE246C123C1C1879F), SPH_C64(0x02A253515353F1A6), + SPH_C64(0x8BAEDC57DCDC72A5), SPH_C64(0x27580B2C0B0B5316), + SPH_C64(0xD39C9D4E9D9D0127), SPH_C64(0xC1476CAD6C6C2BD8), + SPH_C64(0xF59531C43131A462), SPH_C64(0xB98774CD7474F3E8), + SPH_C64(0x09E3F6FFF6F615F1), SPH_C64(0x430A460546464C8C), + SPH_C64(0x2609AC8AACACA545), SPH_C64(0x973C891E8989B50F), + SPH_C64(0x44A014501414B428), SPH_C64(0x425BE1A3E1E1BADF), + SPH_C64(0x4EB016581616A62C), SPH_C64(0xD2CD3AE83A3AF774), + SPH_C64(0xD06F69B9696906D2), SPH_C64(0x2D48092409094112), + SPH_C64(0xADA770DD7070D7E0), SPH_C64(0x54D9B6E2B6B66F71), + SPH_C64(0xB7CED067D0D01EBD), SPH_C64(0x7E3BED93EDEDD6C7), + SPH_C64(0xDB2ECC17CCCCE285), SPH_C64(0x572A421542426884), + SPH_C64(0xC2B4985A98982C2D), SPH_C64(0x0E49A4AAA4A4ED55), + SPH_C64(0x885D28A028287550), SPH_C64(0x31DA5C6D5C5C86B8), + SPH_C64(0x3F93F8C7F8F86BED), SPH_C64(0xA44486228686C211) +}; + +static const sph_u64 plain_T3[256] = { + SPH_C64(0xC018601818D83078), SPH_C64(0x05238C23232646AF), + SPH_C64(0x7EC63FC6C6B891F9), SPH_C64(0x13E887E8E8FBCD6F), + SPH_C64(0x4C87268787CB13A1), SPH_C64(0xA9B8DAB8B8116D62), + SPH_C64(0x0801040101090205), SPH_C64(0x424F214F4F0D9E6E), + SPH_C64(0xAD36D836369B6CEE), SPH_C64(0x59A6A2A6A6FF5104), + SPH_C64(0xDED26FD2D20CB9BD), SPH_C64(0xFBF5F3F5F50EF706), + SPH_C64(0xEF79F9797996F280), SPH_C64(0x5F6FA16F6F30DECE), + SPH_C64(0xFC917E91916D3FEF), SPH_C64(0xAA52555252F8A407), + SPH_C64(0x27609D606047C0FD), SPH_C64(0x89BCCABCBC356576), + SPH_C64(0xAC9B569B9B372BCD), SPH_C64(0x048E028E8E8A018C), + SPH_C64(0x71A3B6A3A3D25B15), SPH_C64(0x600C300C0C6C183C), + SPH_C64(0xFF7BF17B7B84F68A), SPH_C64(0xB535D43535806AE1), + SPH_C64(0xE81D741D1DF53A69), SPH_C64(0x53E0A7E0E0B3DD47), + SPH_C64(0xF6D77BD7D721B3AC), SPH_C64(0x5EC22FC2C29C99ED), + SPH_C64(0x6D2EB82E2E435C96), SPH_C64(0x624B314B4B29967A), + SPH_C64(0xA3FEDFFEFE5DE121), SPH_C64(0x8257415757D5AE16), + SPH_C64(0xA815541515BD2A41), SPH_C64(0x9F77C17777E8EEB6), + SPH_C64(0xA537DC3737926EEB), SPH_C64(0x7BE5B3E5E59ED756), + SPH_C64(0x8C9F469F9F1323D9), SPH_C64(0xD3F0E7F0F023FD17), + SPH_C64(0x6A4A354A4A20947F), SPH_C64(0x9EDA4FDADA44A995), + SPH_C64(0xFA587D5858A2B025), SPH_C64(0x06C903C9C9CF8FCA), + SPH_C64(0x5529A429297C528D), SPH_C64(0x500A280A0A5A1422), + SPH_C64(0xE1B1FEB1B1507F4F), SPH_C64(0x69A0BAA0A0C95D1A), + SPH_C64(0x7F6BB16B6B14D6DA), SPH_C64(0x5C852E8585D917AB), + SPH_C64(0x81BDCEBDBD3C6773), SPH_C64(0xD25D695D5D8FBA34), + SPH_C64(0x8010401010902050), SPH_C64(0xF3F4F7F4F407F503), + SPH_C64(0x16CB0BCBCBDD8BC0), SPH_C64(0xED3EF83E3ED37CC6), + SPH_C64(0x28051405052D0A11), SPH_C64(0x1F6781676778CEE6), + SPH_C64(0x73E4B7E4E497D553), SPH_C64(0x25279C2727024EBB), + SPH_C64(0x3241194141738258), SPH_C64(0x2C8B168B8BA70B9D), + SPH_C64(0x51A7A6A7A7F65301), SPH_C64(0xCF7DE97D7DB2FA94), + SPH_C64(0xDC956E95954937FB), SPH_C64(0x8ED847D8D856AD9F), + SPH_C64(0x8BFBCBFBFB70EB30), SPH_C64(0x23EE9FEEEECDC171), + SPH_C64(0xC77CED7C7CBBF891), SPH_C64(0x176685666671CCE3), + SPH_C64(0xA6DD53DDDD7BA78E), SPH_C64(0xB8175C1717AF2E4B), + SPH_C64(0x0247014747458E46), SPH_C64(0x849E429E9E1A21DC), + SPH_C64(0x1ECA0FCACAD489C5), SPH_C64(0x752DB42D2D585A99), + SPH_C64(0x91BFC6BFBF2E6379), SPH_C64(0x38071C07073F0E1B), + SPH_C64(0x01AD8EADADAC4723), SPH_C64(0xEA5A755A5AB0B42F), + SPH_C64(0x6C83368383EF1BB5), SPH_C64(0x8533CC3333B666FF), + SPH_C64(0x3F639163635CC6F2), SPH_C64(0x100208020212040A), + SPH_C64(0x39AA92AAAA934938), SPH_C64(0xAF71D97171DEE2A8), + SPH_C64(0x0EC807C8C8C68DCF), SPH_C64(0xC819641919D1327D), + SPH_C64(0x72493949493B9270), SPH_C64(0x86D943D9D95FAF9A), + SPH_C64(0xC3F2EFF2F231F91D), SPH_C64(0x4BE3ABE3E3A8DB48), + SPH_C64(0xE25B715B5BB9B62A), SPH_C64(0x34881A8888BC0D92), + SPH_C64(0xA49A529A9A3E29C8), SPH_C64(0x2D269826260B4CBE), + SPH_C64(0x8D32C83232BF64FA), SPH_C64(0xE9B0FAB0B0597D4A), + SPH_C64(0x1BE983E9E9F2CF6A), SPH_C64(0x780F3C0F0F771E33), + SPH_C64(0xE6D573D5D533B7A6), SPH_C64(0x74803A8080F41DBA), + SPH_C64(0x99BEC2BEBE27617C), SPH_C64(0x26CD13CDCDEB87DE), + SPH_C64(0xBD34D034348968E4), SPH_C64(0x7A483D4848329075), + SPH_C64(0xABFFDBFFFF54E324), SPH_C64(0xF77AF57A7A8DF48F), + SPH_C64(0xF4907A9090643DEA), SPH_C64(0xC25F615F5F9DBE3E), + SPH_C64(0x1D208020203D40A0), SPH_C64(0x6768BD68680FD0D5), + SPH_C64(0xD01A681A1ACA3472), SPH_C64(0x19AE82AEAEB7412C), + SPH_C64(0xC9B4EAB4B47D755E), SPH_C64(0x9A544D5454CEA819), + SPH_C64(0xEC937693937F3BE5), SPH_C64(0x0D228822222F44AA), + SPH_C64(0x07648D646463C8E9), SPH_C64(0xDBF1E3F1F12AFF12), + SPH_C64(0xBF73D17373CCE6A2), SPH_C64(0x901248121282245A), + SPH_C64(0x3A401D40407A805D), SPH_C64(0x4008200808481028), + SPH_C64(0x56C32BC3C3959BE8), SPH_C64(0x33EC97ECECDFC57B), + SPH_C64(0x96DB4BDBDB4DAB90), SPH_C64(0x61A1BEA1A1C05F1F), + SPH_C64(0x1C8D0E8D8D910783), SPH_C64(0xF53DF43D3DC87AC9), + SPH_C64(0xCC976697975B33F1), SPH_C64(0x0000000000000000), + SPH_C64(0x36CF1BCFCFF983D4), SPH_C64(0x452BAC2B2B6E5687), + SPH_C64(0x9776C57676E1ECB3), SPH_C64(0x6482328282E619B0), + SPH_C64(0xFED67FD6D628B1A9), SPH_C64(0xD81B6C1B1BC33677), + SPH_C64(0xC1B5EEB5B574775B), SPH_C64(0x11AF86AFAFBE4329), + SPH_C64(0x776AB56A6A1DD4DF), SPH_C64(0xBA505D5050EAA00D), + SPH_C64(0x1245094545578A4C), SPH_C64(0xCBF3EBF3F338FB18), + SPH_C64(0x9D30C03030AD60F0), SPH_C64(0x2BEF9BEFEFC4C374), + SPH_C64(0xE53FFC3F3FDA7EC3), SPH_C64(0x9255495555C7AA1C), + SPH_C64(0x79A2B2A2A2DB5910), SPH_C64(0x03EA8FEAEAE9C965), + SPH_C64(0x0F658965656ACAEC), SPH_C64(0xB9BAD2BABA036968), + SPH_C64(0x652FBC2F2F4A5E93), SPH_C64(0x4EC027C0C08E9DE7), + SPH_C64(0xBEDE5FDEDE60A181), SPH_C64(0xE01C701C1CFC386C), + SPH_C64(0xBBFDD3FDFD46E72E), SPH_C64(0x524D294D4D1F9A64), + SPH_C64(0xE4927292927639E0), SPH_C64(0x8F75C97575FAEABC), + SPH_C64(0x3006180606360C1E), SPH_C64(0x248A128A8AAE0998), + SPH_C64(0xF9B2F2B2B24B7940), SPH_C64(0x63E6BFE6E685D159), + SPH_C64(0x700E380E0E7E1C36), SPH_C64(0xF81F7C1F1FE73E63), + SPH_C64(0x376295626255C4F7), SPH_C64(0xEED477D4D43AB5A3), + SPH_C64(0x29A89AA8A8814D32), SPH_C64(0xC4966296965231F4), + SPH_C64(0x9BF9C3F9F962EF3A), SPH_C64(0x66C533C5C5A397F6), + SPH_C64(0x3525942525104AB1), SPH_C64(0xF259795959ABB220), + SPH_C64(0x54842A8484D015AE), SPH_C64(0xB772D57272C5E4A7), + SPH_C64(0xD539E43939EC72DD), SPH_C64(0x5A4C2D4C4C169861), + SPH_C64(0xCA5E655E5E94BC3B), SPH_C64(0xE778FD78789FF085), + SPH_C64(0xDD38E03838E570D8), SPH_C64(0x148C0A8C8C980586), + SPH_C64(0xC6D163D1D117BFB2), SPH_C64(0x41A5AEA5A5E4570B), + SPH_C64(0x43E2AFE2E2A1D94D), SPH_C64(0x2F619961614EC2F8), + SPH_C64(0xF1B3F6B3B3427B45), SPH_C64(0x15218421213442A5), + SPH_C64(0x949C4A9C9C0825D6), SPH_C64(0xF01E781E1EEE3C66), + SPH_C64(0x2243114343618652), SPH_C64(0x76C73BC7C7B193FC), + SPH_C64(0xB3FCD7FCFC4FE52B), SPH_C64(0x2004100404240814), + SPH_C64(0xB251595151E3A208), SPH_C64(0xBC995E9999252FC7), + SPH_C64(0x4F6DA96D6D22DAC4), SPH_C64(0x680D340D0D651A39), + SPH_C64(0x83FACFFAFA79E935), SPH_C64(0xB6DF5BDFDF69A384), + SPH_C64(0xD77EE57E7EA9FC9B), SPH_C64(0x3D249024241948B4), + SPH_C64(0xC53BEC3B3BFE76D7), SPH_C64(0x31AB96ABAB9A4B3D), + SPH_C64(0x3ECE1FCECEF081D1), SPH_C64(0x8811441111992255), + SPH_C64(0x0C8F068F8F830389), SPH_C64(0x4A4E254E4E049C6B), + SPH_C64(0xD1B7E6B7B7667351), SPH_C64(0x0BEB8BEBEBE0CB60), + SPH_C64(0xFD3CF03C3CC178CC), SPH_C64(0x7C813E8181FD1FBF), + SPH_C64(0xD4946A94944035FE), SPH_C64(0xEBF7FBF7F71CF30C), + SPH_C64(0xA1B9DEB9B9186F67), SPH_C64(0x98134C13138B265F), + SPH_C64(0x7D2CB02C2C51589C), SPH_C64(0xD6D36BD3D305BBB8), + SPH_C64(0x6BE7BBE7E78CD35C), SPH_C64(0x576EA56E6E39DCCB), + SPH_C64(0x6EC437C4C4AA95F3), SPH_C64(0x18030C03031B060F), + SPH_C64(0x8A56455656DCAC13), SPH_C64(0x1A440D44445E8849), + SPH_C64(0xDF7FE17F7FA0FE9E), SPH_C64(0x21A99EA9A9884F37), + SPH_C64(0x4D2AA82A2A675482), SPH_C64(0xB1BBD6BBBB0A6B6D), + SPH_C64(0x46C123C1C1879FE2), SPH_C64(0xA253515353F1A602), + SPH_C64(0xAEDC57DCDC72A58B), SPH_C64(0x580B2C0B0B531627), + SPH_C64(0x9C9D4E9D9D0127D3), SPH_C64(0x476CAD6C6C2BD8C1), + SPH_C64(0x9531C43131A462F5), SPH_C64(0x8774CD7474F3E8B9), + SPH_C64(0xE3F6FFF6F615F109), SPH_C64(0x0A460546464C8C43), + SPH_C64(0x09AC8AACACA54526), SPH_C64(0x3C891E8989B50F97), + SPH_C64(0xA014501414B42844), SPH_C64(0x5BE1A3E1E1BADF42), + SPH_C64(0xB016581616A62C4E), SPH_C64(0xCD3AE83A3AF774D2), + SPH_C64(0x6F69B9696906D2D0), SPH_C64(0x480924090941122D), + SPH_C64(0xA770DD7070D7E0AD), SPH_C64(0xD9B6E2B6B66F7154), + SPH_C64(0xCED067D0D01EBDB7), SPH_C64(0x3BED93EDEDD6C77E), + SPH_C64(0x2ECC17CCCCE285DB), SPH_C64(0x2A42154242688457), + SPH_C64(0xB4985A98982C2DC2), SPH_C64(0x49A4AAA4A4ED550E), + SPH_C64(0x5D28A02828755088), SPH_C64(0xDA5C6D5C5C86B831), + SPH_C64(0x93F8C7F8F86BED3F), SPH_C64(0x4486228686C211A4) +}; + +static const sph_u64 plain_T4[256] = { + SPH_C64(0x18601818D83078C0), SPH_C64(0x238C23232646AF05), + SPH_C64(0xC63FC6C6B891F97E), SPH_C64(0xE887E8E8FBCD6F13), + SPH_C64(0x87268787CB13A14C), SPH_C64(0xB8DAB8B8116D62A9), + SPH_C64(0x0104010109020508), SPH_C64(0x4F214F4F0D9E6E42), + SPH_C64(0x36D836369B6CEEAD), SPH_C64(0xA6A2A6A6FF510459), + SPH_C64(0xD26FD2D20CB9BDDE), SPH_C64(0xF5F3F5F50EF706FB), + SPH_C64(0x79F9797996F280EF), SPH_C64(0x6FA16F6F30DECE5F), + SPH_C64(0x917E91916D3FEFFC), SPH_C64(0x52555252F8A407AA), + SPH_C64(0x609D606047C0FD27), SPH_C64(0xBCCABCBC35657689), + SPH_C64(0x9B569B9B372BCDAC), SPH_C64(0x8E028E8E8A018C04), + SPH_C64(0xA3B6A3A3D25B1571), SPH_C64(0x0C300C0C6C183C60), + SPH_C64(0x7BF17B7B84F68AFF), SPH_C64(0x35D43535806AE1B5), + SPH_C64(0x1D741D1DF53A69E8), SPH_C64(0xE0A7E0E0B3DD4753), + SPH_C64(0xD77BD7D721B3ACF6), SPH_C64(0xC22FC2C29C99ED5E), + SPH_C64(0x2EB82E2E435C966D), SPH_C64(0x4B314B4B29967A62), + SPH_C64(0xFEDFFEFE5DE121A3), SPH_C64(0x57415757D5AE1682), + SPH_C64(0x15541515BD2A41A8), SPH_C64(0x77C17777E8EEB69F), + SPH_C64(0x37DC3737926EEBA5), SPH_C64(0xE5B3E5E59ED7567B), + SPH_C64(0x9F469F9F1323D98C), SPH_C64(0xF0E7F0F023FD17D3), + SPH_C64(0x4A354A4A20947F6A), SPH_C64(0xDA4FDADA44A9959E), + SPH_C64(0x587D5858A2B025FA), SPH_C64(0xC903C9C9CF8FCA06), + SPH_C64(0x29A429297C528D55), SPH_C64(0x0A280A0A5A142250), + SPH_C64(0xB1FEB1B1507F4FE1), SPH_C64(0xA0BAA0A0C95D1A69), + SPH_C64(0x6BB16B6B14D6DA7F), SPH_C64(0x852E8585D917AB5C), + SPH_C64(0xBDCEBDBD3C677381), SPH_C64(0x5D695D5D8FBA34D2), + SPH_C64(0x1040101090205080), SPH_C64(0xF4F7F4F407F503F3), + SPH_C64(0xCB0BCBCBDD8BC016), SPH_C64(0x3EF83E3ED37CC6ED), + SPH_C64(0x051405052D0A1128), SPH_C64(0x6781676778CEE61F), + SPH_C64(0xE4B7E4E497D55373), SPH_C64(0x279C2727024EBB25), + SPH_C64(0x4119414173825832), SPH_C64(0x8B168B8BA70B9D2C), + SPH_C64(0xA7A6A7A7F6530151), SPH_C64(0x7DE97D7DB2FA94CF), + SPH_C64(0x956E95954937FBDC), SPH_C64(0xD847D8D856AD9F8E), + SPH_C64(0xFBCBFBFB70EB308B), SPH_C64(0xEE9FEEEECDC17123), + SPH_C64(0x7CED7C7CBBF891C7), SPH_C64(0x6685666671CCE317), + SPH_C64(0xDD53DDDD7BA78EA6), SPH_C64(0x175C1717AF2E4BB8), + SPH_C64(0x47014747458E4602), SPH_C64(0x9E429E9E1A21DC84), + SPH_C64(0xCA0FCACAD489C51E), SPH_C64(0x2DB42D2D585A9975), + SPH_C64(0xBFC6BFBF2E637991), SPH_C64(0x071C07073F0E1B38), + SPH_C64(0xAD8EADADAC472301), SPH_C64(0x5A755A5AB0B42FEA), + SPH_C64(0x83368383EF1BB56C), SPH_C64(0x33CC3333B666FF85), + SPH_C64(0x639163635CC6F23F), SPH_C64(0x0208020212040A10), + SPH_C64(0xAA92AAAA93493839), SPH_C64(0x71D97171DEE2A8AF), + SPH_C64(0xC807C8C8C68DCF0E), SPH_C64(0x19641919D1327DC8), + SPH_C64(0x493949493B927072), SPH_C64(0xD943D9D95FAF9A86), + SPH_C64(0xF2EFF2F231F91DC3), SPH_C64(0xE3ABE3E3A8DB484B), + SPH_C64(0x5B715B5BB9B62AE2), SPH_C64(0x881A8888BC0D9234), + SPH_C64(0x9A529A9A3E29C8A4), SPH_C64(0x269826260B4CBE2D), + SPH_C64(0x32C83232BF64FA8D), SPH_C64(0xB0FAB0B0597D4AE9), + SPH_C64(0xE983E9E9F2CF6A1B), SPH_C64(0x0F3C0F0F771E3378), + SPH_C64(0xD573D5D533B7A6E6), SPH_C64(0x803A8080F41DBA74), + SPH_C64(0xBEC2BEBE27617C99), SPH_C64(0xCD13CDCDEB87DE26), + SPH_C64(0x34D034348968E4BD), SPH_C64(0x483D48483290757A), + SPH_C64(0xFFDBFFFF54E324AB), SPH_C64(0x7AF57A7A8DF48FF7), + SPH_C64(0x907A9090643DEAF4), SPH_C64(0x5F615F5F9DBE3EC2), + SPH_C64(0x208020203D40A01D), SPH_C64(0x68BD68680FD0D567), + SPH_C64(0x1A681A1ACA3472D0), SPH_C64(0xAE82AEAEB7412C19), + SPH_C64(0xB4EAB4B47D755EC9), SPH_C64(0x544D5454CEA8199A), + SPH_C64(0x937693937F3BE5EC), SPH_C64(0x228822222F44AA0D), + SPH_C64(0x648D646463C8E907), SPH_C64(0xF1E3F1F12AFF12DB), + SPH_C64(0x73D17373CCE6A2BF), SPH_C64(0x1248121282245A90), + SPH_C64(0x401D40407A805D3A), SPH_C64(0x0820080848102840), + SPH_C64(0xC32BC3C3959BE856), SPH_C64(0xEC97ECECDFC57B33), + SPH_C64(0xDB4BDBDB4DAB9096), SPH_C64(0xA1BEA1A1C05F1F61), + SPH_C64(0x8D0E8D8D9107831C), SPH_C64(0x3DF43D3DC87AC9F5), + SPH_C64(0x976697975B33F1CC), SPH_C64(0x0000000000000000), + SPH_C64(0xCF1BCFCFF983D436), SPH_C64(0x2BAC2B2B6E568745), + SPH_C64(0x76C57676E1ECB397), SPH_C64(0x82328282E619B064), + SPH_C64(0xD67FD6D628B1A9FE), SPH_C64(0x1B6C1B1BC33677D8), + SPH_C64(0xB5EEB5B574775BC1), SPH_C64(0xAF86AFAFBE432911), + SPH_C64(0x6AB56A6A1DD4DF77), SPH_C64(0x505D5050EAA00DBA), + SPH_C64(0x45094545578A4C12), SPH_C64(0xF3EBF3F338FB18CB), + SPH_C64(0x30C03030AD60F09D), SPH_C64(0xEF9BEFEFC4C3742B), + SPH_C64(0x3FFC3F3FDA7EC3E5), SPH_C64(0x55495555C7AA1C92), + SPH_C64(0xA2B2A2A2DB591079), SPH_C64(0xEA8FEAEAE9C96503), + SPH_C64(0x658965656ACAEC0F), SPH_C64(0xBAD2BABA036968B9), + SPH_C64(0x2FBC2F2F4A5E9365), SPH_C64(0xC027C0C08E9DE74E), + SPH_C64(0xDE5FDEDE60A181BE), SPH_C64(0x1C701C1CFC386CE0), + SPH_C64(0xFDD3FDFD46E72EBB), SPH_C64(0x4D294D4D1F9A6452), + SPH_C64(0x927292927639E0E4), SPH_C64(0x75C97575FAEABC8F), + SPH_C64(0x06180606360C1E30), SPH_C64(0x8A128A8AAE099824), + SPH_C64(0xB2F2B2B24B7940F9), SPH_C64(0xE6BFE6E685D15963), + SPH_C64(0x0E380E0E7E1C3670), SPH_C64(0x1F7C1F1FE73E63F8), + SPH_C64(0x6295626255C4F737), SPH_C64(0xD477D4D43AB5A3EE), + SPH_C64(0xA89AA8A8814D3229), SPH_C64(0x966296965231F4C4), + SPH_C64(0xF9C3F9F962EF3A9B), SPH_C64(0xC533C5C5A397F666), + SPH_C64(0x25942525104AB135), SPH_C64(0x59795959ABB220F2), + SPH_C64(0x842A8484D015AE54), SPH_C64(0x72D57272C5E4A7B7), + SPH_C64(0x39E43939EC72DDD5), SPH_C64(0x4C2D4C4C1698615A), + SPH_C64(0x5E655E5E94BC3BCA), SPH_C64(0x78FD78789FF085E7), + SPH_C64(0x38E03838E570D8DD), SPH_C64(0x8C0A8C8C98058614), + SPH_C64(0xD163D1D117BFB2C6), SPH_C64(0xA5AEA5A5E4570B41), + SPH_C64(0xE2AFE2E2A1D94D43), SPH_C64(0x619961614EC2F82F), + SPH_C64(0xB3F6B3B3427B45F1), SPH_C64(0x218421213442A515), + SPH_C64(0x9C4A9C9C0825D694), SPH_C64(0x1E781E1EEE3C66F0), + SPH_C64(0x4311434361865222), SPH_C64(0xC73BC7C7B193FC76), + SPH_C64(0xFCD7FCFC4FE52BB3), SPH_C64(0x0410040424081420), + SPH_C64(0x51595151E3A208B2), SPH_C64(0x995E9999252FC7BC), + SPH_C64(0x6DA96D6D22DAC44F), SPH_C64(0x0D340D0D651A3968), + SPH_C64(0xFACFFAFA79E93583), SPH_C64(0xDF5BDFDF69A384B6), + SPH_C64(0x7EE57E7EA9FC9BD7), SPH_C64(0x249024241948B43D), + SPH_C64(0x3BEC3B3BFE76D7C5), SPH_C64(0xAB96ABAB9A4B3D31), + SPH_C64(0xCE1FCECEF081D13E), SPH_C64(0x1144111199225588), + SPH_C64(0x8F068F8F8303890C), SPH_C64(0x4E254E4E049C6B4A), + SPH_C64(0xB7E6B7B7667351D1), SPH_C64(0xEB8BEBEBE0CB600B), + SPH_C64(0x3CF03C3CC178CCFD), SPH_C64(0x813E8181FD1FBF7C), + SPH_C64(0x946A94944035FED4), SPH_C64(0xF7FBF7F71CF30CEB), + SPH_C64(0xB9DEB9B9186F67A1), SPH_C64(0x134C13138B265F98), + SPH_C64(0x2CB02C2C51589C7D), SPH_C64(0xD36BD3D305BBB8D6), + SPH_C64(0xE7BBE7E78CD35C6B), SPH_C64(0x6EA56E6E39DCCB57), + SPH_C64(0xC437C4C4AA95F36E), SPH_C64(0x030C03031B060F18), + SPH_C64(0x56455656DCAC138A), SPH_C64(0x440D44445E88491A), + SPH_C64(0x7FE17F7FA0FE9EDF), SPH_C64(0xA99EA9A9884F3721), + SPH_C64(0x2AA82A2A6754824D), SPH_C64(0xBBD6BBBB0A6B6DB1), + SPH_C64(0xC123C1C1879FE246), SPH_C64(0x53515353F1A602A2), + SPH_C64(0xDC57DCDC72A58BAE), SPH_C64(0x0B2C0B0B53162758), + SPH_C64(0x9D4E9D9D0127D39C), SPH_C64(0x6CAD6C6C2BD8C147), + SPH_C64(0x31C43131A462F595), SPH_C64(0x74CD7474F3E8B987), + SPH_C64(0xF6FFF6F615F109E3), SPH_C64(0x460546464C8C430A), + SPH_C64(0xAC8AACACA5452609), SPH_C64(0x891E8989B50F973C), + SPH_C64(0x14501414B42844A0), SPH_C64(0xE1A3E1E1BADF425B), + SPH_C64(0x16581616A62C4EB0), SPH_C64(0x3AE83A3AF774D2CD), + SPH_C64(0x69B9696906D2D06F), SPH_C64(0x0924090941122D48), + SPH_C64(0x70DD7070D7E0ADA7), SPH_C64(0xB6E2B6B66F7154D9), + SPH_C64(0xD067D0D01EBDB7CE), SPH_C64(0xED93EDEDD6C77E3B), + SPH_C64(0xCC17CCCCE285DB2E), SPH_C64(0x421542426884572A), + SPH_C64(0x985A98982C2DC2B4), SPH_C64(0xA4AAA4A4ED550E49), + SPH_C64(0x28A028287550885D), SPH_C64(0x5C6D5C5C86B831DA), + SPH_C64(0xF8C7F8F86BED3F93), SPH_C64(0x86228686C211A444) +}; + +static const sph_u64 plain_T5[256] = { + SPH_C64(0x601818D83078C018), SPH_C64(0x8C23232646AF0523), + SPH_C64(0x3FC6C6B891F97EC6), SPH_C64(0x87E8E8FBCD6F13E8), + SPH_C64(0x268787CB13A14C87), SPH_C64(0xDAB8B8116D62A9B8), + SPH_C64(0x0401010902050801), SPH_C64(0x214F4F0D9E6E424F), + SPH_C64(0xD836369B6CEEAD36), SPH_C64(0xA2A6A6FF510459A6), + SPH_C64(0x6FD2D20CB9BDDED2), SPH_C64(0xF3F5F50EF706FBF5), + SPH_C64(0xF9797996F280EF79), SPH_C64(0xA16F6F30DECE5F6F), + SPH_C64(0x7E91916D3FEFFC91), SPH_C64(0x555252F8A407AA52), + SPH_C64(0x9D606047C0FD2760), SPH_C64(0xCABCBC35657689BC), + SPH_C64(0x569B9B372BCDAC9B), SPH_C64(0x028E8E8A018C048E), + SPH_C64(0xB6A3A3D25B1571A3), SPH_C64(0x300C0C6C183C600C), + SPH_C64(0xF17B7B84F68AFF7B), SPH_C64(0xD43535806AE1B535), + SPH_C64(0x741D1DF53A69E81D), SPH_C64(0xA7E0E0B3DD4753E0), + SPH_C64(0x7BD7D721B3ACF6D7), SPH_C64(0x2FC2C29C99ED5EC2), + SPH_C64(0xB82E2E435C966D2E), SPH_C64(0x314B4B29967A624B), + SPH_C64(0xDFFEFE5DE121A3FE), SPH_C64(0x415757D5AE168257), + SPH_C64(0x541515BD2A41A815), SPH_C64(0xC17777E8EEB69F77), + SPH_C64(0xDC3737926EEBA537), SPH_C64(0xB3E5E59ED7567BE5), + SPH_C64(0x469F9F1323D98C9F), SPH_C64(0xE7F0F023FD17D3F0), + SPH_C64(0x354A4A20947F6A4A), SPH_C64(0x4FDADA44A9959EDA), + SPH_C64(0x7D5858A2B025FA58), SPH_C64(0x03C9C9CF8FCA06C9), + SPH_C64(0xA429297C528D5529), SPH_C64(0x280A0A5A1422500A), + SPH_C64(0xFEB1B1507F4FE1B1), SPH_C64(0xBAA0A0C95D1A69A0), + SPH_C64(0xB16B6B14D6DA7F6B), SPH_C64(0x2E8585D917AB5C85), + SPH_C64(0xCEBDBD3C677381BD), SPH_C64(0x695D5D8FBA34D25D), + SPH_C64(0x4010109020508010), SPH_C64(0xF7F4F407F503F3F4), + SPH_C64(0x0BCBCBDD8BC016CB), SPH_C64(0xF83E3ED37CC6ED3E), + SPH_C64(0x1405052D0A112805), SPH_C64(0x81676778CEE61F67), + SPH_C64(0xB7E4E497D55373E4), SPH_C64(0x9C2727024EBB2527), + SPH_C64(0x1941417382583241), SPH_C64(0x168B8BA70B9D2C8B), + SPH_C64(0xA6A7A7F6530151A7), SPH_C64(0xE97D7DB2FA94CF7D), + SPH_C64(0x6E95954937FBDC95), SPH_C64(0x47D8D856AD9F8ED8), + SPH_C64(0xCBFBFB70EB308BFB), SPH_C64(0x9FEEEECDC17123EE), + SPH_C64(0xED7C7CBBF891C77C), SPH_C64(0x85666671CCE31766), + SPH_C64(0x53DDDD7BA78EA6DD), SPH_C64(0x5C1717AF2E4BB817), + SPH_C64(0x014747458E460247), SPH_C64(0x429E9E1A21DC849E), + SPH_C64(0x0FCACAD489C51ECA), SPH_C64(0xB42D2D585A99752D), + SPH_C64(0xC6BFBF2E637991BF), SPH_C64(0x1C07073F0E1B3807), + SPH_C64(0x8EADADAC472301AD), SPH_C64(0x755A5AB0B42FEA5A), + SPH_C64(0x368383EF1BB56C83), SPH_C64(0xCC3333B666FF8533), + SPH_C64(0x9163635CC6F23F63), SPH_C64(0x08020212040A1002), + SPH_C64(0x92AAAA93493839AA), SPH_C64(0xD97171DEE2A8AF71), + SPH_C64(0x07C8C8C68DCF0EC8), SPH_C64(0x641919D1327DC819), + SPH_C64(0x3949493B92707249), SPH_C64(0x43D9D95FAF9A86D9), + SPH_C64(0xEFF2F231F91DC3F2), SPH_C64(0xABE3E3A8DB484BE3), + SPH_C64(0x715B5BB9B62AE25B), SPH_C64(0x1A8888BC0D923488), + SPH_C64(0x529A9A3E29C8A49A), SPH_C64(0x9826260B4CBE2D26), + SPH_C64(0xC83232BF64FA8D32), SPH_C64(0xFAB0B0597D4AE9B0), + SPH_C64(0x83E9E9F2CF6A1BE9), SPH_C64(0x3C0F0F771E33780F), + SPH_C64(0x73D5D533B7A6E6D5), SPH_C64(0x3A8080F41DBA7480), + SPH_C64(0xC2BEBE27617C99BE), SPH_C64(0x13CDCDEB87DE26CD), + SPH_C64(0xD034348968E4BD34), SPH_C64(0x3D48483290757A48), + SPH_C64(0xDBFFFF54E324ABFF), SPH_C64(0xF57A7A8DF48FF77A), + SPH_C64(0x7A9090643DEAF490), SPH_C64(0x615F5F9DBE3EC25F), + SPH_C64(0x8020203D40A01D20), SPH_C64(0xBD68680FD0D56768), + SPH_C64(0x681A1ACA3472D01A), SPH_C64(0x82AEAEB7412C19AE), + SPH_C64(0xEAB4B47D755EC9B4), SPH_C64(0x4D5454CEA8199A54), + SPH_C64(0x7693937F3BE5EC93), SPH_C64(0x8822222F44AA0D22), + SPH_C64(0x8D646463C8E90764), SPH_C64(0xE3F1F12AFF12DBF1), + SPH_C64(0xD17373CCE6A2BF73), SPH_C64(0x48121282245A9012), + SPH_C64(0x1D40407A805D3A40), SPH_C64(0x2008084810284008), + SPH_C64(0x2BC3C3959BE856C3), SPH_C64(0x97ECECDFC57B33EC), + SPH_C64(0x4BDBDB4DAB9096DB), SPH_C64(0xBEA1A1C05F1F61A1), + SPH_C64(0x0E8D8D9107831C8D), SPH_C64(0xF43D3DC87AC9F53D), + SPH_C64(0x6697975B33F1CC97), SPH_C64(0x0000000000000000), + SPH_C64(0x1BCFCFF983D436CF), SPH_C64(0xAC2B2B6E5687452B), + SPH_C64(0xC57676E1ECB39776), SPH_C64(0x328282E619B06482), + SPH_C64(0x7FD6D628B1A9FED6), SPH_C64(0x6C1B1BC33677D81B), + SPH_C64(0xEEB5B574775BC1B5), SPH_C64(0x86AFAFBE432911AF), + SPH_C64(0xB56A6A1DD4DF776A), SPH_C64(0x5D5050EAA00DBA50), + SPH_C64(0x094545578A4C1245), SPH_C64(0xEBF3F338FB18CBF3), + SPH_C64(0xC03030AD60F09D30), SPH_C64(0x9BEFEFC4C3742BEF), + SPH_C64(0xFC3F3FDA7EC3E53F), SPH_C64(0x495555C7AA1C9255), + SPH_C64(0xB2A2A2DB591079A2), SPH_C64(0x8FEAEAE9C96503EA), + SPH_C64(0x8965656ACAEC0F65), SPH_C64(0xD2BABA036968B9BA), + SPH_C64(0xBC2F2F4A5E93652F), SPH_C64(0x27C0C08E9DE74EC0), + SPH_C64(0x5FDEDE60A181BEDE), SPH_C64(0x701C1CFC386CE01C), + SPH_C64(0xD3FDFD46E72EBBFD), SPH_C64(0x294D4D1F9A64524D), + SPH_C64(0x7292927639E0E492), SPH_C64(0xC97575FAEABC8F75), + SPH_C64(0x180606360C1E3006), SPH_C64(0x128A8AAE0998248A), + SPH_C64(0xF2B2B24B7940F9B2), SPH_C64(0xBFE6E685D15963E6), + SPH_C64(0x380E0E7E1C36700E), SPH_C64(0x7C1F1FE73E63F81F), + SPH_C64(0x95626255C4F73762), SPH_C64(0x77D4D43AB5A3EED4), + SPH_C64(0x9AA8A8814D3229A8), SPH_C64(0x6296965231F4C496), + SPH_C64(0xC3F9F962EF3A9BF9), SPH_C64(0x33C5C5A397F666C5), + SPH_C64(0x942525104AB13525), SPH_C64(0x795959ABB220F259), + SPH_C64(0x2A8484D015AE5484), SPH_C64(0xD57272C5E4A7B772), + SPH_C64(0xE43939EC72DDD539), SPH_C64(0x2D4C4C1698615A4C), + SPH_C64(0x655E5E94BC3BCA5E), SPH_C64(0xFD78789FF085E778), + SPH_C64(0xE03838E570D8DD38), SPH_C64(0x0A8C8C980586148C), + SPH_C64(0x63D1D117BFB2C6D1), SPH_C64(0xAEA5A5E4570B41A5), + SPH_C64(0xAFE2E2A1D94D43E2), SPH_C64(0x9961614EC2F82F61), + SPH_C64(0xF6B3B3427B45F1B3), SPH_C64(0x8421213442A51521), + SPH_C64(0x4A9C9C0825D6949C), SPH_C64(0x781E1EEE3C66F01E), + SPH_C64(0x1143436186522243), SPH_C64(0x3BC7C7B193FC76C7), + SPH_C64(0xD7FCFC4FE52BB3FC), SPH_C64(0x1004042408142004), + SPH_C64(0x595151E3A208B251), SPH_C64(0x5E9999252FC7BC99), + SPH_C64(0xA96D6D22DAC44F6D), SPH_C64(0x340D0D651A39680D), + SPH_C64(0xCFFAFA79E93583FA), SPH_C64(0x5BDFDF69A384B6DF), + SPH_C64(0xE57E7EA9FC9BD77E), SPH_C64(0x9024241948B43D24), + SPH_C64(0xEC3B3BFE76D7C53B), SPH_C64(0x96ABAB9A4B3D31AB), + SPH_C64(0x1FCECEF081D13ECE), SPH_C64(0x4411119922558811), + SPH_C64(0x068F8F8303890C8F), SPH_C64(0x254E4E049C6B4A4E), + SPH_C64(0xE6B7B7667351D1B7), SPH_C64(0x8BEBEBE0CB600BEB), + SPH_C64(0xF03C3CC178CCFD3C), SPH_C64(0x3E8181FD1FBF7C81), + SPH_C64(0x6A94944035FED494), SPH_C64(0xFBF7F71CF30CEBF7), + SPH_C64(0xDEB9B9186F67A1B9), SPH_C64(0x4C13138B265F9813), + SPH_C64(0xB02C2C51589C7D2C), SPH_C64(0x6BD3D305BBB8D6D3), + SPH_C64(0xBBE7E78CD35C6BE7), SPH_C64(0xA56E6E39DCCB576E), + SPH_C64(0x37C4C4AA95F36EC4), SPH_C64(0x0C03031B060F1803), + SPH_C64(0x455656DCAC138A56), SPH_C64(0x0D44445E88491A44), + SPH_C64(0xE17F7FA0FE9EDF7F), SPH_C64(0x9EA9A9884F3721A9), + SPH_C64(0xA82A2A6754824D2A), SPH_C64(0xD6BBBB0A6B6DB1BB), + SPH_C64(0x23C1C1879FE246C1), SPH_C64(0x515353F1A602A253), + SPH_C64(0x57DCDC72A58BAEDC), SPH_C64(0x2C0B0B531627580B), + SPH_C64(0x4E9D9D0127D39C9D), SPH_C64(0xAD6C6C2BD8C1476C), + SPH_C64(0xC43131A462F59531), SPH_C64(0xCD7474F3E8B98774), + SPH_C64(0xFFF6F615F109E3F6), SPH_C64(0x0546464C8C430A46), + SPH_C64(0x8AACACA5452609AC), SPH_C64(0x1E8989B50F973C89), + SPH_C64(0x501414B42844A014), SPH_C64(0xA3E1E1BADF425BE1), + SPH_C64(0x581616A62C4EB016), SPH_C64(0xE83A3AF774D2CD3A), + SPH_C64(0xB9696906D2D06F69), SPH_C64(0x24090941122D4809), + SPH_C64(0xDD7070D7E0ADA770), SPH_C64(0xE2B6B66F7154D9B6), + SPH_C64(0x67D0D01EBDB7CED0), SPH_C64(0x93EDEDD6C77E3BED), + SPH_C64(0x17CCCCE285DB2ECC), SPH_C64(0x1542426884572A42), + SPH_C64(0x5A98982C2DC2B498), SPH_C64(0xAAA4A4ED550E49A4), + SPH_C64(0xA028287550885D28), SPH_C64(0x6D5C5C86B831DA5C), + SPH_C64(0xC7F8F86BED3F93F8), SPH_C64(0x228686C211A44486) +}; + +static const sph_u64 plain_T6[256] = { + SPH_C64(0x1818D83078C01860), SPH_C64(0x23232646AF05238C), + SPH_C64(0xC6C6B891F97EC63F), SPH_C64(0xE8E8FBCD6F13E887), + SPH_C64(0x8787CB13A14C8726), SPH_C64(0xB8B8116D62A9B8DA), + SPH_C64(0x0101090205080104), SPH_C64(0x4F4F0D9E6E424F21), + SPH_C64(0x36369B6CEEAD36D8), SPH_C64(0xA6A6FF510459A6A2), + SPH_C64(0xD2D20CB9BDDED26F), SPH_C64(0xF5F50EF706FBF5F3), + SPH_C64(0x797996F280EF79F9), SPH_C64(0x6F6F30DECE5F6FA1), + SPH_C64(0x91916D3FEFFC917E), SPH_C64(0x5252F8A407AA5255), + SPH_C64(0x606047C0FD27609D), SPH_C64(0xBCBC35657689BCCA), + SPH_C64(0x9B9B372BCDAC9B56), SPH_C64(0x8E8E8A018C048E02), + SPH_C64(0xA3A3D25B1571A3B6), SPH_C64(0x0C0C6C183C600C30), + SPH_C64(0x7B7B84F68AFF7BF1), SPH_C64(0x3535806AE1B535D4), + SPH_C64(0x1D1DF53A69E81D74), SPH_C64(0xE0E0B3DD4753E0A7), + SPH_C64(0xD7D721B3ACF6D77B), SPH_C64(0xC2C29C99ED5EC22F), + SPH_C64(0x2E2E435C966D2EB8), SPH_C64(0x4B4B29967A624B31), + SPH_C64(0xFEFE5DE121A3FEDF), SPH_C64(0x5757D5AE16825741), + SPH_C64(0x1515BD2A41A81554), SPH_C64(0x7777E8EEB69F77C1), + SPH_C64(0x3737926EEBA537DC), SPH_C64(0xE5E59ED7567BE5B3), + SPH_C64(0x9F9F1323D98C9F46), SPH_C64(0xF0F023FD17D3F0E7), + SPH_C64(0x4A4A20947F6A4A35), SPH_C64(0xDADA44A9959EDA4F), + SPH_C64(0x5858A2B025FA587D), SPH_C64(0xC9C9CF8FCA06C903), + SPH_C64(0x29297C528D5529A4), SPH_C64(0x0A0A5A1422500A28), + SPH_C64(0xB1B1507F4FE1B1FE), SPH_C64(0xA0A0C95D1A69A0BA), + SPH_C64(0x6B6B14D6DA7F6BB1), SPH_C64(0x8585D917AB5C852E), + SPH_C64(0xBDBD3C677381BDCE), SPH_C64(0x5D5D8FBA34D25D69), + SPH_C64(0x1010902050801040), SPH_C64(0xF4F407F503F3F4F7), + SPH_C64(0xCBCBDD8BC016CB0B), SPH_C64(0x3E3ED37CC6ED3EF8), + SPH_C64(0x05052D0A11280514), SPH_C64(0x676778CEE61F6781), + SPH_C64(0xE4E497D55373E4B7), SPH_C64(0x2727024EBB25279C), + SPH_C64(0x4141738258324119), SPH_C64(0x8B8BA70B9D2C8B16), + SPH_C64(0xA7A7F6530151A7A6), SPH_C64(0x7D7DB2FA94CF7DE9), + SPH_C64(0x95954937FBDC956E), SPH_C64(0xD8D856AD9F8ED847), + SPH_C64(0xFBFB70EB308BFBCB), SPH_C64(0xEEEECDC17123EE9F), + SPH_C64(0x7C7CBBF891C77CED), SPH_C64(0x666671CCE3176685), + SPH_C64(0xDDDD7BA78EA6DD53), SPH_C64(0x1717AF2E4BB8175C), + SPH_C64(0x4747458E46024701), SPH_C64(0x9E9E1A21DC849E42), + SPH_C64(0xCACAD489C51ECA0F), SPH_C64(0x2D2D585A99752DB4), + SPH_C64(0xBFBF2E637991BFC6), SPH_C64(0x07073F0E1B38071C), + SPH_C64(0xADADAC472301AD8E), SPH_C64(0x5A5AB0B42FEA5A75), + SPH_C64(0x8383EF1BB56C8336), SPH_C64(0x3333B666FF8533CC), + SPH_C64(0x63635CC6F23F6391), SPH_C64(0x020212040A100208), + SPH_C64(0xAAAA93493839AA92), SPH_C64(0x7171DEE2A8AF71D9), + SPH_C64(0xC8C8C68DCF0EC807), SPH_C64(0x1919D1327DC81964), + SPH_C64(0x49493B9270724939), SPH_C64(0xD9D95FAF9A86D943), + SPH_C64(0xF2F231F91DC3F2EF), SPH_C64(0xE3E3A8DB484BE3AB), + SPH_C64(0x5B5BB9B62AE25B71), SPH_C64(0x8888BC0D9234881A), + SPH_C64(0x9A9A3E29C8A49A52), SPH_C64(0x26260B4CBE2D2698), + SPH_C64(0x3232BF64FA8D32C8), SPH_C64(0xB0B0597D4AE9B0FA), + SPH_C64(0xE9E9F2CF6A1BE983), SPH_C64(0x0F0F771E33780F3C), + SPH_C64(0xD5D533B7A6E6D573), SPH_C64(0x8080F41DBA74803A), + SPH_C64(0xBEBE27617C99BEC2), SPH_C64(0xCDCDEB87DE26CD13), + SPH_C64(0x34348968E4BD34D0), SPH_C64(0x48483290757A483D), + SPH_C64(0xFFFF54E324ABFFDB), SPH_C64(0x7A7A8DF48FF77AF5), + SPH_C64(0x9090643DEAF4907A), SPH_C64(0x5F5F9DBE3EC25F61), + SPH_C64(0x20203D40A01D2080), SPH_C64(0x68680FD0D56768BD), + SPH_C64(0x1A1ACA3472D01A68), SPH_C64(0xAEAEB7412C19AE82), + SPH_C64(0xB4B47D755EC9B4EA), SPH_C64(0x5454CEA8199A544D), + SPH_C64(0x93937F3BE5EC9376), SPH_C64(0x22222F44AA0D2288), + SPH_C64(0x646463C8E907648D), SPH_C64(0xF1F12AFF12DBF1E3), + SPH_C64(0x7373CCE6A2BF73D1), SPH_C64(0x121282245A901248), + SPH_C64(0x40407A805D3A401D), SPH_C64(0x0808481028400820), + SPH_C64(0xC3C3959BE856C32B), SPH_C64(0xECECDFC57B33EC97), + SPH_C64(0xDBDB4DAB9096DB4B), SPH_C64(0xA1A1C05F1F61A1BE), + SPH_C64(0x8D8D9107831C8D0E), SPH_C64(0x3D3DC87AC9F53DF4), + SPH_C64(0x97975B33F1CC9766), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFF983D436CF1B), SPH_C64(0x2B2B6E5687452BAC), + SPH_C64(0x7676E1ECB39776C5), SPH_C64(0x8282E619B0648232), + SPH_C64(0xD6D628B1A9FED67F), SPH_C64(0x1B1BC33677D81B6C), + SPH_C64(0xB5B574775BC1B5EE), SPH_C64(0xAFAFBE432911AF86), + SPH_C64(0x6A6A1DD4DF776AB5), SPH_C64(0x5050EAA00DBA505D), + SPH_C64(0x4545578A4C124509), SPH_C64(0xF3F338FB18CBF3EB), + SPH_C64(0x3030AD60F09D30C0), SPH_C64(0xEFEFC4C3742BEF9B), + SPH_C64(0x3F3FDA7EC3E53FFC), SPH_C64(0x5555C7AA1C925549), + SPH_C64(0xA2A2DB591079A2B2), SPH_C64(0xEAEAE9C96503EA8F), + SPH_C64(0x65656ACAEC0F6589), SPH_C64(0xBABA036968B9BAD2), + SPH_C64(0x2F2F4A5E93652FBC), SPH_C64(0xC0C08E9DE74EC027), + SPH_C64(0xDEDE60A181BEDE5F), SPH_C64(0x1C1CFC386CE01C70), + SPH_C64(0xFDFD46E72EBBFDD3), SPH_C64(0x4D4D1F9A64524D29), + SPH_C64(0x92927639E0E49272), SPH_C64(0x7575FAEABC8F75C9), + SPH_C64(0x0606360C1E300618), SPH_C64(0x8A8AAE0998248A12), + SPH_C64(0xB2B24B7940F9B2F2), SPH_C64(0xE6E685D15963E6BF), + SPH_C64(0x0E0E7E1C36700E38), SPH_C64(0x1F1FE73E63F81F7C), + SPH_C64(0x626255C4F7376295), SPH_C64(0xD4D43AB5A3EED477), + SPH_C64(0xA8A8814D3229A89A), SPH_C64(0x96965231F4C49662), + SPH_C64(0xF9F962EF3A9BF9C3), SPH_C64(0xC5C5A397F666C533), + SPH_C64(0x2525104AB1352594), SPH_C64(0x5959ABB220F25979), + SPH_C64(0x8484D015AE54842A), SPH_C64(0x7272C5E4A7B772D5), + SPH_C64(0x3939EC72DDD539E4), SPH_C64(0x4C4C1698615A4C2D), + SPH_C64(0x5E5E94BC3BCA5E65), SPH_C64(0x78789FF085E778FD), + SPH_C64(0x3838E570D8DD38E0), SPH_C64(0x8C8C980586148C0A), + SPH_C64(0xD1D117BFB2C6D163), SPH_C64(0xA5A5E4570B41A5AE), + SPH_C64(0xE2E2A1D94D43E2AF), SPH_C64(0x61614EC2F82F6199), + SPH_C64(0xB3B3427B45F1B3F6), SPH_C64(0x21213442A5152184), + SPH_C64(0x9C9C0825D6949C4A), SPH_C64(0x1E1EEE3C66F01E78), + SPH_C64(0x4343618652224311), SPH_C64(0xC7C7B193FC76C73B), + SPH_C64(0xFCFC4FE52BB3FCD7), SPH_C64(0x0404240814200410), + SPH_C64(0x5151E3A208B25159), SPH_C64(0x9999252FC7BC995E), + SPH_C64(0x6D6D22DAC44F6DA9), SPH_C64(0x0D0D651A39680D34), + SPH_C64(0xFAFA79E93583FACF), SPH_C64(0xDFDF69A384B6DF5B), + SPH_C64(0x7E7EA9FC9BD77EE5), SPH_C64(0x24241948B43D2490), + SPH_C64(0x3B3BFE76D7C53BEC), SPH_C64(0xABAB9A4B3D31AB96), + SPH_C64(0xCECEF081D13ECE1F), SPH_C64(0x1111992255881144), + SPH_C64(0x8F8F8303890C8F06), SPH_C64(0x4E4E049C6B4A4E25), + SPH_C64(0xB7B7667351D1B7E6), SPH_C64(0xEBEBE0CB600BEB8B), + SPH_C64(0x3C3CC178CCFD3CF0), SPH_C64(0x8181FD1FBF7C813E), + SPH_C64(0x94944035FED4946A), SPH_C64(0xF7F71CF30CEBF7FB), + SPH_C64(0xB9B9186F67A1B9DE), SPH_C64(0x13138B265F98134C), + SPH_C64(0x2C2C51589C7D2CB0), SPH_C64(0xD3D305BBB8D6D36B), + SPH_C64(0xE7E78CD35C6BE7BB), SPH_C64(0x6E6E39DCCB576EA5), + SPH_C64(0xC4C4AA95F36EC437), SPH_C64(0x03031B060F18030C), + SPH_C64(0x5656DCAC138A5645), SPH_C64(0x44445E88491A440D), + SPH_C64(0x7F7FA0FE9EDF7FE1), SPH_C64(0xA9A9884F3721A99E), + SPH_C64(0x2A2A6754824D2AA8), SPH_C64(0xBBBB0A6B6DB1BBD6), + SPH_C64(0xC1C1879FE246C123), SPH_C64(0x5353F1A602A25351), + SPH_C64(0xDCDC72A58BAEDC57), SPH_C64(0x0B0B531627580B2C), + SPH_C64(0x9D9D0127D39C9D4E), SPH_C64(0x6C6C2BD8C1476CAD), + SPH_C64(0x3131A462F59531C4), SPH_C64(0x7474F3E8B98774CD), + SPH_C64(0xF6F615F109E3F6FF), SPH_C64(0x46464C8C430A4605), + SPH_C64(0xACACA5452609AC8A), SPH_C64(0x8989B50F973C891E), + SPH_C64(0x1414B42844A01450), SPH_C64(0xE1E1BADF425BE1A3), + SPH_C64(0x1616A62C4EB01658), SPH_C64(0x3A3AF774D2CD3AE8), + SPH_C64(0x696906D2D06F69B9), SPH_C64(0x090941122D480924), + SPH_C64(0x7070D7E0ADA770DD), SPH_C64(0xB6B66F7154D9B6E2), + SPH_C64(0xD0D01EBDB7CED067), SPH_C64(0xEDEDD6C77E3BED93), + SPH_C64(0xCCCCE285DB2ECC17), SPH_C64(0x42426884572A4215), + SPH_C64(0x98982C2DC2B4985A), SPH_C64(0xA4A4ED550E49A4AA), + SPH_C64(0x28287550885D28A0), SPH_C64(0x5C5C86B831DA5C6D), + SPH_C64(0xF8F86BED3F93F8C7), SPH_C64(0x8686C211A4448622) +}; + +static const sph_u64 plain_T7[256] = { + SPH_C64(0x18D83078C0186018), SPH_C64(0x232646AF05238C23), + SPH_C64(0xC6B891F97EC63FC6), SPH_C64(0xE8FBCD6F13E887E8), + SPH_C64(0x87CB13A14C872687), SPH_C64(0xB8116D62A9B8DAB8), + SPH_C64(0x0109020508010401), SPH_C64(0x4F0D9E6E424F214F), + SPH_C64(0x369B6CEEAD36D836), SPH_C64(0xA6FF510459A6A2A6), + SPH_C64(0xD20CB9BDDED26FD2), SPH_C64(0xF50EF706FBF5F3F5), + SPH_C64(0x7996F280EF79F979), SPH_C64(0x6F30DECE5F6FA16F), + SPH_C64(0x916D3FEFFC917E91), SPH_C64(0x52F8A407AA525552), + SPH_C64(0x6047C0FD27609D60), SPH_C64(0xBC35657689BCCABC), + SPH_C64(0x9B372BCDAC9B569B), SPH_C64(0x8E8A018C048E028E), + SPH_C64(0xA3D25B1571A3B6A3), SPH_C64(0x0C6C183C600C300C), + SPH_C64(0x7B84F68AFF7BF17B), SPH_C64(0x35806AE1B535D435), + SPH_C64(0x1DF53A69E81D741D), SPH_C64(0xE0B3DD4753E0A7E0), + SPH_C64(0xD721B3ACF6D77BD7), SPH_C64(0xC29C99ED5EC22FC2), + SPH_C64(0x2E435C966D2EB82E), SPH_C64(0x4B29967A624B314B), + SPH_C64(0xFE5DE121A3FEDFFE), SPH_C64(0x57D5AE1682574157), + SPH_C64(0x15BD2A41A8155415), SPH_C64(0x77E8EEB69F77C177), + SPH_C64(0x37926EEBA537DC37), SPH_C64(0xE59ED7567BE5B3E5), + SPH_C64(0x9F1323D98C9F469F), SPH_C64(0xF023FD17D3F0E7F0), + SPH_C64(0x4A20947F6A4A354A), SPH_C64(0xDA44A9959EDA4FDA), + SPH_C64(0x58A2B025FA587D58), SPH_C64(0xC9CF8FCA06C903C9), + SPH_C64(0x297C528D5529A429), SPH_C64(0x0A5A1422500A280A), + SPH_C64(0xB1507F4FE1B1FEB1), SPH_C64(0xA0C95D1A69A0BAA0), + SPH_C64(0x6B14D6DA7F6BB16B), SPH_C64(0x85D917AB5C852E85), + SPH_C64(0xBD3C677381BDCEBD), SPH_C64(0x5D8FBA34D25D695D), + SPH_C64(0x1090205080104010), SPH_C64(0xF407F503F3F4F7F4), + SPH_C64(0xCBDD8BC016CB0BCB), SPH_C64(0x3ED37CC6ED3EF83E), + SPH_C64(0x052D0A1128051405), SPH_C64(0x6778CEE61F678167), + SPH_C64(0xE497D55373E4B7E4), SPH_C64(0x27024EBB25279C27), + SPH_C64(0x4173825832411941), SPH_C64(0x8BA70B9D2C8B168B), + SPH_C64(0xA7F6530151A7A6A7), SPH_C64(0x7DB2FA94CF7DE97D), + SPH_C64(0x954937FBDC956E95), SPH_C64(0xD856AD9F8ED847D8), + SPH_C64(0xFB70EB308BFBCBFB), SPH_C64(0xEECDC17123EE9FEE), + SPH_C64(0x7CBBF891C77CED7C), SPH_C64(0x6671CCE317668566), + SPH_C64(0xDD7BA78EA6DD53DD), SPH_C64(0x17AF2E4BB8175C17), + SPH_C64(0x47458E4602470147), SPH_C64(0x9E1A21DC849E429E), + SPH_C64(0xCAD489C51ECA0FCA), SPH_C64(0x2D585A99752DB42D), + SPH_C64(0xBF2E637991BFC6BF), SPH_C64(0x073F0E1B38071C07), + SPH_C64(0xADAC472301AD8EAD), SPH_C64(0x5AB0B42FEA5A755A), + SPH_C64(0x83EF1BB56C833683), SPH_C64(0x33B666FF8533CC33), + SPH_C64(0x635CC6F23F639163), SPH_C64(0x0212040A10020802), + SPH_C64(0xAA93493839AA92AA), SPH_C64(0x71DEE2A8AF71D971), + SPH_C64(0xC8C68DCF0EC807C8), SPH_C64(0x19D1327DC8196419), + SPH_C64(0x493B927072493949), SPH_C64(0xD95FAF9A86D943D9), + SPH_C64(0xF231F91DC3F2EFF2), SPH_C64(0xE3A8DB484BE3ABE3), + SPH_C64(0x5BB9B62AE25B715B), SPH_C64(0x88BC0D9234881A88), + SPH_C64(0x9A3E29C8A49A529A), SPH_C64(0x260B4CBE2D269826), + SPH_C64(0x32BF64FA8D32C832), SPH_C64(0xB0597D4AE9B0FAB0), + SPH_C64(0xE9F2CF6A1BE983E9), SPH_C64(0x0F771E33780F3C0F), + SPH_C64(0xD533B7A6E6D573D5), SPH_C64(0x80F41DBA74803A80), + SPH_C64(0xBE27617C99BEC2BE), SPH_C64(0xCDEB87DE26CD13CD), + SPH_C64(0x348968E4BD34D034), SPH_C64(0x483290757A483D48), + SPH_C64(0xFF54E324ABFFDBFF), SPH_C64(0x7A8DF48FF77AF57A), + SPH_C64(0x90643DEAF4907A90), SPH_C64(0x5F9DBE3EC25F615F), + SPH_C64(0x203D40A01D208020), SPH_C64(0x680FD0D56768BD68), + SPH_C64(0x1ACA3472D01A681A), SPH_C64(0xAEB7412C19AE82AE), + SPH_C64(0xB47D755EC9B4EAB4), SPH_C64(0x54CEA8199A544D54), + SPH_C64(0x937F3BE5EC937693), SPH_C64(0x222F44AA0D228822), + SPH_C64(0x6463C8E907648D64), SPH_C64(0xF12AFF12DBF1E3F1), + SPH_C64(0x73CCE6A2BF73D173), SPH_C64(0x1282245A90124812), + SPH_C64(0x407A805D3A401D40), SPH_C64(0x0848102840082008), + SPH_C64(0xC3959BE856C32BC3), SPH_C64(0xECDFC57B33EC97EC), + SPH_C64(0xDB4DAB9096DB4BDB), SPH_C64(0xA1C05F1F61A1BEA1), + SPH_C64(0x8D9107831C8D0E8D), SPH_C64(0x3DC87AC9F53DF43D), + SPH_C64(0x975B33F1CC976697), SPH_C64(0x0000000000000000), + SPH_C64(0xCFF983D436CF1BCF), SPH_C64(0x2B6E5687452BAC2B), + SPH_C64(0x76E1ECB39776C576), SPH_C64(0x82E619B064823282), + SPH_C64(0xD628B1A9FED67FD6), SPH_C64(0x1BC33677D81B6C1B), + SPH_C64(0xB574775BC1B5EEB5), SPH_C64(0xAFBE432911AF86AF), + SPH_C64(0x6A1DD4DF776AB56A), SPH_C64(0x50EAA00DBA505D50), + SPH_C64(0x45578A4C12450945), SPH_C64(0xF338FB18CBF3EBF3), + SPH_C64(0x30AD60F09D30C030), SPH_C64(0xEFC4C3742BEF9BEF), + SPH_C64(0x3FDA7EC3E53FFC3F), SPH_C64(0x55C7AA1C92554955), + SPH_C64(0xA2DB591079A2B2A2), SPH_C64(0xEAE9C96503EA8FEA), + SPH_C64(0x656ACAEC0F658965), SPH_C64(0xBA036968B9BAD2BA), + SPH_C64(0x2F4A5E93652FBC2F), SPH_C64(0xC08E9DE74EC027C0), + SPH_C64(0xDE60A181BEDE5FDE), SPH_C64(0x1CFC386CE01C701C), + SPH_C64(0xFD46E72EBBFDD3FD), SPH_C64(0x4D1F9A64524D294D), + SPH_C64(0x927639E0E4927292), SPH_C64(0x75FAEABC8F75C975), + SPH_C64(0x06360C1E30061806), SPH_C64(0x8AAE0998248A128A), + SPH_C64(0xB24B7940F9B2F2B2), SPH_C64(0xE685D15963E6BFE6), + SPH_C64(0x0E7E1C36700E380E), SPH_C64(0x1FE73E63F81F7C1F), + SPH_C64(0x6255C4F737629562), SPH_C64(0xD43AB5A3EED477D4), + SPH_C64(0xA8814D3229A89AA8), SPH_C64(0x965231F4C4966296), + SPH_C64(0xF962EF3A9BF9C3F9), SPH_C64(0xC5A397F666C533C5), + SPH_C64(0x25104AB135259425), SPH_C64(0x59ABB220F2597959), + SPH_C64(0x84D015AE54842A84), SPH_C64(0x72C5E4A7B772D572), + SPH_C64(0x39EC72DDD539E439), SPH_C64(0x4C1698615A4C2D4C), + SPH_C64(0x5E94BC3BCA5E655E), SPH_C64(0x789FF085E778FD78), + SPH_C64(0x38E570D8DD38E038), SPH_C64(0x8C980586148C0A8C), + SPH_C64(0xD117BFB2C6D163D1), SPH_C64(0xA5E4570B41A5AEA5), + SPH_C64(0xE2A1D94D43E2AFE2), SPH_C64(0x614EC2F82F619961), + SPH_C64(0xB3427B45F1B3F6B3), SPH_C64(0x213442A515218421), + SPH_C64(0x9C0825D6949C4A9C), SPH_C64(0x1EEE3C66F01E781E), + SPH_C64(0x4361865222431143), SPH_C64(0xC7B193FC76C73BC7), + SPH_C64(0xFC4FE52BB3FCD7FC), SPH_C64(0x0424081420041004), + SPH_C64(0x51E3A208B2515951), SPH_C64(0x99252FC7BC995E99), + SPH_C64(0x6D22DAC44F6DA96D), SPH_C64(0x0D651A39680D340D), + SPH_C64(0xFA79E93583FACFFA), SPH_C64(0xDF69A384B6DF5BDF), + SPH_C64(0x7EA9FC9BD77EE57E), SPH_C64(0x241948B43D249024), + SPH_C64(0x3BFE76D7C53BEC3B), SPH_C64(0xAB9A4B3D31AB96AB), + SPH_C64(0xCEF081D13ECE1FCE), SPH_C64(0x1199225588114411), + SPH_C64(0x8F8303890C8F068F), SPH_C64(0x4E049C6B4A4E254E), + SPH_C64(0xB7667351D1B7E6B7), SPH_C64(0xEBE0CB600BEB8BEB), + SPH_C64(0x3CC178CCFD3CF03C), SPH_C64(0x81FD1FBF7C813E81), + SPH_C64(0x944035FED4946A94), SPH_C64(0xF71CF30CEBF7FBF7), + SPH_C64(0xB9186F67A1B9DEB9), SPH_C64(0x138B265F98134C13), + SPH_C64(0x2C51589C7D2CB02C), SPH_C64(0xD305BBB8D6D36BD3), + SPH_C64(0xE78CD35C6BE7BBE7), SPH_C64(0x6E39DCCB576EA56E), + SPH_C64(0xC4AA95F36EC437C4), SPH_C64(0x031B060F18030C03), + SPH_C64(0x56DCAC138A564556), SPH_C64(0x445E88491A440D44), + SPH_C64(0x7FA0FE9EDF7FE17F), SPH_C64(0xA9884F3721A99EA9), + SPH_C64(0x2A6754824D2AA82A), SPH_C64(0xBB0A6B6DB1BBD6BB), + SPH_C64(0xC1879FE246C123C1), SPH_C64(0x53F1A602A2535153), + SPH_C64(0xDC72A58BAEDC57DC), SPH_C64(0x0B531627580B2C0B), + SPH_C64(0x9D0127D39C9D4E9D), SPH_C64(0x6C2BD8C1476CAD6C), + SPH_C64(0x31A462F59531C431), SPH_C64(0x74F3E8B98774CD74), + SPH_C64(0xF615F109E3F6FFF6), SPH_C64(0x464C8C430A460546), + SPH_C64(0xACA5452609AC8AAC), SPH_C64(0x89B50F973C891E89), + SPH_C64(0x14B42844A0145014), SPH_C64(0xE1BADF425BE1A3E1), + SPH_C64(0x16A62C4EB0165816), SPH_C64(0x3AF774D2CD3AE83A), + SPH_C64(0x6906D2D06F69B969), SPH_C64(0x0941122D48092409), + SPH_C64(0x70D7E0ADA770DD70), SPH_C64(0xB66F7154D9B6E2B6), + SPH_C64(0xD01EBDB7CED067D0), SPH_C64(0xEDD6C77E3BED93ED), + SPH_C64(0xCCE285DB2ECC17CC), SPH_C64(0x426884572A421542), + SPH_C64(0x982C2DC2B4985A98), SPH_C64(0xA4ED550E49A4AAA4), + SPH_C64(0x287550885D28A028), SPH_C64(0x5C86B831DA5C6D5C), + SPH_C64(0xF86BED3F93F8C7F8), SPH_C64(0x86C211A444862286) +}; + +#endif + +/* + * Round constants. + */ +static const sph_u64 plain_RC[10] = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + +/* ====================================================================== */ +/* + * Constants for plain WHIRLPOOL-0 (first version). + */ + +static const sph_u64 old0_T0[256] = { + SPH_C64(0xD50F67D568B86868), SPH_C64(0xB71ECEB7D06DD0D0), + SPH_C64(0x60E00B60EB20EBEB), SPH_C64(0x876E45872B7D2B2B), + SPH_C64(0x75327A7548D84848), SPH_C64(0xD3019CD39DBA9D9D), + SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x53977353E431E4E4), + SPH_C64(0x48A84B48E338E3E3), SPH_C64(0x15D27115A3F8A3A3), + SPH_C64(0x13DC8A1356FA5656), SPH_C64(0xBFFD7CBF819E8181), + SPH_C64(0x94B2CF947D877D7D), SPH_C64(0x122ADB12F10EF1F1), + SPH_C64(0xABD95CAB85928585), SPH_C64(0xDC1A84DC9EBF9E9E), + SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0x8C8A048C8E8F8E8E), + SPH_C64(0x859FE78578887878), SPH_C64(0xC5D41EC5CA43CACA), + SPH_C64(0x4BAFB84B17391717), SPH_C64(0x37882137A9E6A9A9), + SPH_C64(0xF84E2FF861A36161), SPH_C64(0xA633E6A6D562D5D5), + SPH_C64(0x348FD2345DE75D5D), SPH_C64(0x275358270B1D0B0B), + SPH_C64(0x869814868C898C8C), SPH_C64(0xCCC1FDCC3C443C3C), + SPH_C64(0xB6E89FB677997777), SPH_C64(0x08E3B20851F35151), + SPH_C64(0xAA2F0DAA22662222), SPH_C64(0x57682A5742C64242), + SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x19CE9A1954FC5454), + SPH_C64(0x5873325841C34141), SPH_C64(0xBAF474BA809D8080), + SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0xA4C244A486978686), + SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0x78D8C07818281818), + SPH_C64(0x96436D962E722E2E), SPH_C64(0x16D5821657F95757), + SPH_C64(0x1E36301E060A0606), SPH_C64(0xF75537F762A66262), + SPH_C64(0x0307F303F401F4F4), SPH_C64(0xEE9BADEE365A3636), + SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0xDA147FDA6BBD6B6B), + SPH_C64(0x77C3D8771B2D1B1B), SPH_C64(0xEC6A0FEC65AF6565), + SPH_C64(0xBCFA8FBC759F7575), SPH_C64(0x5090805010301010), + SPH_C64(0x95449E95DA73DADA), SPH_C64(0x703B727049DB4949), + SPH_C64(0xBE0B2DBE266A2626), SPH_C64(0x3A629B3AF916F9F9), + SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xE37117E366AA6666), + SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0x6803B968BAD3BABA), + SPH_C64(0x2CB7192CAEEFAEAE), SPH_C64(0x0DEABA0D50F05050), + SPH_C64(0x07F8AA0752F65252), SPH_C64(0x3D9A313DABE0ABAB), + SPH_C64(0x112D2811050F0505), SPH_C64(0x1723D317F00DF0F0), + SPH_C64(0x396568390D170D0D), SPH_C64(0xA2CCBFA273957373), + SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x14242014040C0404), + SPH_C64(0xA03D1DA020602020), SPH_C64(0x215DA321FE1FFEFE), + SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x060EFB06F502F5F5), + SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x3E9DC23E5FE15F5F), + SPH_C64(0x225A50220A1E0A0A), SPH_C64(0x5B74C15BB5C2B5B5), + SPH_C64(0xE78E4EE7C05DC0C0), SPH_C64(0x1AC9691AA0FDA0A0), + SPH_C64(0xA8DEAFA871937171), SPH_C64(0x0BE4410BA5F2A5A5), + SPH_C64(0x995875992D772D2D), SPH_C64(0xFD4727FD60A06060), + SPH_C64(0xA7C5B7A772967272), SPH_C64(0xE57FECE593A89393), + SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x2848402808180808), + SPH_C64(0xB5EF6CB583988383), SPH_C64(0xA53415A521632121), + SPH_C64(0x3186DA315CE45C5C), SPH_C64(0xA1CB4CA187948787), + SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x47B35347E03DE0E0), + SPH_C64(0x0000000000000000), SPH_C64(0xE89556E8C358C3C3), + SPH_C64(0x5A82905A12361212), SPH_C64(0xEF6DFCEF91AE9191), + SPH_C64(0x98AE24988A838A8A), SPH_C64(0x0A12100A02060202), + SPH_C64(0x6CFCE06C1C241C1C), SPH_C64(0x59856359E637E6E6), + SPH_C64(0x4C57124C45CF4545), SPH_C64(0xED9C5EEDC25BC2C2), + SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x2E46BB2EFD1AFDFD), + SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x495E1A4944CC4444), + SPH_C64(0x1FC0611FA1FEA1A1), SPH_C64(0x61165A614CD44C4C), + SPH_C64(0xFFB685FF33553333), SPH_C64(0xF6A366F6C552C5C5), + SPH_C64(0xAED054AE84918484), SPH_C64(0xAF2605AF23652323), + SPH_C64(0x91BBC7917C847C7C), SPH_C64(0x4A59E94AB0CDB0B0), + SPH_C64(0xB11035B1256F2525), SPH_C64(0x41BDA841153F1515), + SPH_C64(0xE180B5E1355F3535), SPH_C64(0xD0066FD069BB6969), + SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0xFE40D4FE94A19494), + SPH_C64(0x641F52644DD74D4D), SPH_C64(0xADD7A7AD70907070), + SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x29BE1129AFECAFAF), + SPH_C64(0xDEEB26DECD4ACDCD), SPH_C64(0xA928FEA9D667D6D6), + SPH_C64(0xC12B47C16CB46C6C), SPH_C64(0x5166D151B7C4B7B7), + SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0x2D41482D091B0909), + SPH_C64(0x1838CB18F308F3F3), SPH_C64(0xE6781FE667A96767), + SPH_C64(0x0EED490EA4F1A4A4), SPH_C64(0x65E90365EA23EAEA), + SPH_C64(0x7BDF337BEC29ECEC), SPH_C64(0x546FD954B6C7B6B6), + SPH_C64(0xA33AEEA3D461D4D4), SPH_C64(0xBD0CDEBDD26BD2D2), + SPH_C64(0x44B4A044143C1414), SPH_C64(0x66EEF0661E221E1E), + SPH_C64(0x42BA5B42E13EE1E1), SPH_C64(0xB4193DB4246C2424), + SPH_C64(0xD8E5DDD838483838), SPH_C64(0xF9B87EF9C657C6C6), + SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x7A29627A4BDD4B4B), + SPH_C64(0x8F8DF78F7A8E7A7A), SPH_C64(0xD2F7CDD23A4E3A3A), + SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x3B94CA3B5EE25E5E), + SPH_C64(0x8469B684DF7CDFDF), SPH_C64(0xFB49DCFB95A29595), + SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x38933938AAE3AAAA), + SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xD1F03ED1CE4FCECE), + SPH_C64(0x1B3F381B07090707), SPH_C64(0x337778330F110F0F), + SPH_C64(0xC9C8F5C93D473D3D), SPH_C64(0x25A2FA2558E85858), + SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xC22CB4C298B59898), + SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x1D31C31DF20BF2F2), + SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x5599885511331111), + SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0x9DA72C9D8B808B8B), + SPH_C64(0x5261225243C54343), SPH_C64(0x0F1B180F03050303), + SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0x8B72AE8BDC79DCDC), + SPH_C64(0x569E7B56E532E5E5), SPH_C64(0x404BF940B2CBB2B2), + SPH_C64(0x6B044A6B4ED24E4E), SPH_C64(0xFCB176FCC754C7C7), + SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x6AF21B6AE926E9E9), + SPH_C64(0xBB0225BB27692727), SPH_C64(0x5D7A3A5D40C04040), + SPH_C64(0x9F568E9FD875D8D8), SPH_C64(0xEB92A5EB37593737), + SPH_C64(0xE076E4E092AB9292), SPH_C64(0x89830C898F8C8F8F), + SPH_C64(0x0509080501030101), SPH_C64(0x69F5E8691D271D1D), + SPH_C64(0x02F1A20253F55353), SPH_C64(0xC6D3EDC63E423E3E), + SPH_C64(0x20ABF22059EB5959), SPH_C64(0xE28746E2C15EC1C1), + SPH_C64(0x6E0D426E4FD14F4F), SPH_C64(0xFABF8DFA32563232), + SPH_C64(0x4EA6B04E163A1616), SPH_C64(0x35798335FA13FAFA), + SPH_C64(0xB9F387B9749C7474), SPH_C64(0x30708B30FB10FBFB), + SPH_C64(0xF25C3FF263A56363), SPH_C64(0xD9138CD99FBC9F9F), + SPH_C64(0xE489BDE4345C3434), SPH_C64(0x72CAD0721A2E1A1A), + SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x2FB0EA2F5AEE5A5A), + SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xCACF06CAC946C9C9), + SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x0915E309F607F6F6), + SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x88755D8828782828), + SPH_C64(0x92BC349288858888), SPH_C64(0xCD37ACCD9BB09B9B), + SPH_C64(0xF5A495F531533131), SPH_C64(0x367E70360E120E0E), + SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x7F206A7F4ADE4A4A), + SPH_C64(0x6FFB136FE825E8E8), SPH_C64(0xF452C4F496A79696), + SPH_C64(0x04FF5904A6F7A6A6), SPH_C64(0x3C6C603C0C140C0C), + SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x8096EF80798B7979), + SPH_C64(0x76358976BCD9BCBC), SPH_C64(0x7C27997CBEDFBEBE), + SPH_C64(0x74C42B74EF2CEFEF), SPH_C64(0xCB3957CB6EB26E6E), + SPH_C64(0x434C0A4346CA4646), SPH_C64(0xF15BCCF197A49797), + SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x7ED63B7EED2AEDED), + SPH_C64(0x7DD1C87D192B1919), SPH_C64(0x9A5F869AD976D9D9), + SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0xC725BCC799B69999), + SPH_C64(0x32812932A8E5A8A8), SPH_C64(0x8D7C558D297B2929), + SPH_C64(0xE96307E964AC6464), SPH_C64(0x63E7F8631F211F1F), + SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x1CC7921C55FF5555), + SPH_C64(0x5F8B985F13351313), SPH_C64(0x6D0AB16DBBD0BBBB), + SPH_C64(0x0C1CEB0CF704F7F7), SPH_C64(0xCE305FCE6FB16F6F), + SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x4645024647C94747), + SPH_C64(0x934A65932F712F2F), SPH_C64(0x71CD2371EE2FEEEE), + SPH_C64(0x6211A962B8D5B8B8), SPH_C64(0x8A84FF8A7B8D7B7B), + SPH_C64(0x97B53C9789868989), SPH_C64(0xF0AD9DF030503030), + SPH_C64(0xB805D6B8D368D3D3), SPH_C64(0x9EA0DF9E7F817F7F), + SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282) +}; + +#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL + +static const sph_u64 old0_T1[256] = { + SPH_C64(0x0F67D568B86868D5), SPH_C64(0x1ECEB7D06DD0D0B7), + SPH_C64(0xE00B60EB20EBEB60), SPH_C64(0x6E45872B7D2B2B87), + SPH_C64(0x327A7548D8484875), SPH_C64(0x019CD39DBA9D9DD3), + SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0x977353E431E4E453), + SPH_C64(0xA84B48E338E3E348), SPH_C64(0xD27115A3F8A3A315), + SPH_C64(0xDC8A1356FA565613), SPH_C64(0xFD7CBF819E8181BF), + SPH_C64(0xB2CF947D877D7D94), SPH_C64(0x2ADB12F10EF1F112), + SPH_C64(0xD95CAB85928585AB), SPH_C64(0x1A84DC9EBF9E9EDC), + SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x8A048C8E8F8E8E8C), + SPH_C64(0x9FE7857888787885), SPH_C64(0xD41EC5CA43CACAC5), + SPH_C64(0xAFB84B173917174B), SPH_C64(0x882137A9E6A9A937), + SPH_C64(0x4E2FF861A36161F8), SPH_C64(0x33E6A6D562D5D5A6), + SPH_C64(0x8FD2345DE75D5D34), SPH_C64(0x5358270B1D0B0B27), + SPH_C64(0x9814868C898C8C86), SPH_C64(0xC1FDCC3C443C3CCC), + SPH_C64(0xE89FB677997777B6), SPH_C64(0xE3B20851F3515108), + SPH_C64(0x2F0DAA22662222AA), SPH_C64(0x682A5742C6424257), + SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xCE9A1954FC545419), + SPH_C64(0x73325841C3414158), SPH_C64(0xF474BA809D8080BA), + SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0xC244A486978686A4), + SPH_C64(0x42F145B3C8B3B345), SPH_C64(0xD8C0781828181878), + SPH_C64(0x436D962E722E2E96), SPH_C64(0xD5821657F9575716), + SPH_C64(0x36301E060A06061E), SPH_C64(0x5537F762A66262F7), + SPH_C64(0x07F303F401F4F403), SPH_C64(0x9BADEE365A3636EE), + SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0x147FDA6BBD6B6BDA), + SPH_C64(0xC3D8771B2D1B1B77), SPH_C64(0x6A0FEC65AF6565EC), + SPH_C64(0xFA8FBC759F7575BC), SPH_C64(0x9080501030101050), + SPH_C64(0x449E95DA73DADA95), SPH_C64(0x3B727049DB494970), + SPH_C64(0x0B2DBE266A2626BE), SPH_C64(0x629B3AF916F9F93A), + SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0x7117E366AA6666E3), + SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x03B968BAD3BABA68), + SPH_C64(0xB7192CAEEFAEAE2C), SPH_C64(0xEABA0D50F050500D), + SPH_C64(0xF8AA0752F6525207), SPH_C64(0x9A313DABE0ABAB3D), + SPH_C64(0x2D2811050F050511), SPH_C64(0x23D317F00DF0F017), + SPH_C64(0x6568390D170D0D39), SPH_C64(0xCCBFA273957373A2), + SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x242014040C040414), + SPH_C64(0x3D1DA020602020A0), SPH_C64(0x5DA321FE1FFEFE21), + SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0x0EFB06F502F5F506), + SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0x9DC23E5FE15F5F3E), + SPH_C64(0x5A50220A1E0A0A22), SPH_C64(0x74C15BB5C2B5B55B), + SPH_C64(0x8E4EE7C05DC0C0E7), SPH_C64(0xC9691AA0FDA0A01A), + SPH_C64(0xDEAFA871937171A8), SPH_C64(0xE4410BA5F2A5A50B), + SPH_C64(0x5875992D772D2D99), SPH_C64(0x4727FD60A06060FD), + SPH_C64(0xC5B7A772967272A7), SPH_C64(0x7FECE593A89393E5), + SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x4840280818080828), + SPH_C64(0xEF6CB583988383B5), SPH_C64(0x3415A521632121A5), + SPH_C64(0x86DA315CE45C5C31), SPH_C64(0xCB4CA187948787A1), + SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xB35347E03DE0E047), + SPH_C64(0x0000000000000000), SPH_C64(0x9556E8C358C3C3E8), + SPH_C64(0x82905A123612125A), SPH_C64(0x6DFCEF91AE9191EF), + SPH_C64(0xAE24988A838A8A98), SPH_C64(0x12100A020602020A), + SPH_C64(0xFCE06C1C241C1C6C), SPH_C64(0x856359E637E6E659), + SPH_C64(0x57124C45CF45454C), SPH_C64(0x9C5EEDC25BC2C2ED), + SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x46BB2EFD1AFDFD2E), + SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x5E1A4944CC444449), + SPH_C64(0xC0611FA1FEA1A11F), SPH_C64(0x165A614CD44C4C61), + SPH_C64(0xB685FF33553333FF), SPH_C64(0xA366F6C552C5C5F6), + SPH_C64(0xD054AE84918484AE), SPH_C64(0x2605AF23652323AF), + SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x59E94AB0CDB0B04A), + SPH_C64(0x1035B1256F2525B1), SPH_C64(0xBDA841153F151541), + SPH_C64(0x80B5E1355F3535E1), SPH_C64(0x066FD069BB6969D0), + SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x40D4FE94A19494FE), + SPH_C64(0x1F52644DD74D4D64), SPH_C64(0xD7A7AD70907070AD), + SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xBE1129AFECAFAF29), + SPH_C64(0xEB26DECD4ACDCDDE), SPH_C64(0x28FEA9D667D6D6A9), + SPH_C64(0x2B47C16CB46C6CC1), SPH_C64(0x66D151B7C4B7B751), + SPH_C64(0x6B933FF815F8F83F), SPH_C64(0x41482D091B09092D), + SPH_C64(0x38CB18F308F3F318), SPH_C64(0x781FE667A96767E6), + SPH_C64(0xED490EA4F1A4A40E), SPH_C64(0xE90365EA23EAEA65), + SPH_C64(0xDF337BEC29ECEC7B), SPH_C64(0x6FD954B6C7B6B654), + SPH_C64(0x3AEEA3D461D4D4A3), SPH_C64(0x0CDEBDD26BD2D2BD), + SPH_C64(0xB4A044143C141444), SPH_C64(0xEEF0661E221E1E66), + SPH_C64(0xBA5B42E13EE1E142), SPH_C64(0x193DB4246C2424B4), + SPH_C64(0xE5DDD838483838D8), SPH_C64(0xB87EF9C657C6C6F9), + SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0x29627A4BDD4B4B7A), + SPH_C64(0x8DF78F7A8E7A7A8F), SPH_C64(0xF7CDD23A4E3A3AD2), + SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0x94CA3B5EE25E5E3B), + SPH_C64(0x69B684DF7CDFDF84), SPH_C64(0x49DCFB95A29595FB), + SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x933938AAE3AAAA38), + SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0xF03ED1CE4FCECED1), + SPH_C64(0x3F381B070907071B), SPH_C64(0x7778330F110F0F33), + SPH_C64(0xC8F5C93D473D3DC9), SPH_C64(0xA2FA2558E8585825), + SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x2CB4C298B59898C2), + SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0x31C31DF20BF2F21D), + SPH_C64(0xF65101A7F4A7A701), SPH_C64(0x9988551133111155), + SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0xA72C9D8B808B8B9D), + SPH_C64(0x61225243C5434352), SPH_C64(0x1B180F030503030F), + SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x72AE8BDC79DCDC8B), + SPH_C64(0x9E7B56E532E5E556), SPH_C64(0x4BF940B2CBB2B240), + SPH_C64(0x044A6B4ED24E4E6B), SPH_C64(0xB176FCC754C7C7FC), + SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0xF21B6AE926E9E96A), + SPH_C64(0x0225BB27692727BB), SPH_C64(0x7A3A5D40C040405D), + SPH_C64(0x568E9FD875D8D89F), SPH_C64(0x92A5EB37593737EB), + SPH_C64(0x76E4E092AB9292E0), SPH_C64(0x830C898F8C8F8F89), + SPH_C64(0x0908050103010105), SPH_C64(0xF5E8691D271D1D69), + SPH_C64(0xF1A20253F5535302), SPH_C64(0xD3EDC63E423E3EC6), + SPH_C64(0xABF22059EB595920), SPH_C64(0x8746E2C15EC1C1E2), + SPH_C64(0x0D426E4FD14F4F6E), SPH_C64(0xBF8DFA32563232FA), + SPH_C64(0xA6B04E163A16164E), SPH_C64(0x798335FA13FAFA35), + SPH_C64(0xF387B9749C7474B9), SPH_C64(0x708B30FB10FBFB30), + SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x138CD99FBC9F9FD9), + SPH_C64(0x89BDE4345C3434E4), SPH_C64(0xCAD0721A2E1A1A72), + SPH_C64(0x674D822A7E2A2A82), SPH_C64(0xB0EA2F5AEE5A5A2F), + SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xCF06CAC946C9C9CA), + SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x15E309F607F6F609), + SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x755D882878282888), + SPH_C64(0xBC34928885888892), SPH_C64(0x37ACCD9BB09B9BCD), + SPH_C64(0xA495F531533131F5), SPH_C64(0x7E70360E120E0E36), + SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x206A7F4ADE4A4A7F), + SPH_C64(0xFB136FE825E8E86F), SPH_C64(0x52C4F496A79696F4), + SPH_C64(0xFF5904A6F7A6A604), SPH_C64(0x6C603C0C140C0C3C), + SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0x96EF80798B797980), + SPH_C64(0x358976BCD9BCBC76), SPH_C64(0x27997CBEDFBEBE7C), + SPH_C64(0xC42B74EF2CEFEF74), SPH_C64(0x3957CB6EB26E6ECB), + SPH_C64(0x4C0A4346CA464643), SPH_C64(0x5BCCF197A49797F1), + SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xD63B7EED2AEDED7E), + SPH_C64(0xD1C87D192B19197D), SPH_C64(0x5F869AD976D9D99A), + SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0x25BCC799B69999C7), + SPH_C64(0x812932A8E5A8A832), SPH_C64(0x7C558D297B29298D), + SPH_C64(0x6307E964AC6464E9), SPH_C64(0xE7F8631F211F1F63), + SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xC7921C55FF55551C), + SPH_C64(0x8B985F133513135F), SPH_C64(0x0AB16DBBD0BBBB6D), + SPH_C64(0x1CEB0CF704F7F70C), SPH_C64(0x305FCE6FB16F6FCE), + SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x45024647C9474746), + SPH_C64(0x4A65932F712F2F93), SPH_C64(0xCD2371EE2FEEEE71), + SPH_C64(0x11A962B8D5B8B862), SPH_C64(0x84FF8A7B8D7B7B8A), + SPH_C64(0xB53C978986898997), SPH_C64(0xAD9DF030503030F0), + SPH_C64(0x05D6B8D368D3D3B8), SPH_C64(0xA0DF9E7F817F7F9E), + SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0) +}; + +static const sph_u64 old0_T2[256] = { + SPH_C64(0x67D568B86868D50F), SPH_C64(0xCEB7D06DD0D0B71E), + SPH_C64(0x0B60EB20EBEB60E0), SPH_C64(0x45872B7D2B2B876E), + SPH_C64(0x7A7548D848487532), SPH_C64(0x9CD39DBA9D9DD301), + SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0x7353E431E4E45397), + SPH_C64(0x4B48E338E3E348A8), SPH_C64(0x7115A3F8A3A315D2), + SPH_C64(0x8A1356FA565613DC), SPH_C64(0x7CBF819E8181BFFD), + SPH_C64(0xCF947D877D7D94B2), SPH_C64(0xDB12F10EF1F1122A), + SPH_C64(0x5CAB85928585ABD9), SPH_C64(0x84DC9EBF9E9EDC1A), + SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0x048C8E8F8E8E8C8A), + SPH_C64(0xE78578887878859F), SPH_C64(0x1EC5CA43CACAC5D4), + SPH_C64(0xB84B173917174BAF), SPH_C64(0x2137A9E6A9A93788), + SPH_C64(0x2FF861A36161F84E), SPH_C64(0xE6A6D562D5D5A633), + SPH_C64(0xD2345DE75D5D348F), SPH_C64(0x58270B1D0B0B2753), + SPH_C64(0x14868C898C8C8698), SPH_C64(0xFDCC3C443C3CCCC1), + SPH_C64(0x9FB677997777B6E8), SPH_C64(0xB20851F3515108E3), + SPH_C64(0x0DAA22662222AA2F), SPH_C64(0x2A5742C642425768), + SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x9A1954FC545419CE), + SPH_C64(0x325841C341415873), SPH_C64(0x74BA809D8080BAF4), + SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x44A486978686A4C2), + SPH_C64(0xF145B3C8B3B34542), SPH_C64(0xC0781828181878D8), + SPH_C64(0x6D962E722E2E9643), SPH_C64(0x821657F9575716D5), + SPH_C64(0x301E060A06061E36), SPH_C64(0x37F762A66262F755), + SPH_C64(0xF303F401F4F40307), SPH_C64(0xADEE365A3636EE9B), + SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x7FDA6BBD6B6BDA14), + SPH_C64(0xD8771B2D1B1B77C3), SPH_C64(0x0FEC65AF6565EC6A), + SPH_C64(0x8FBC759F7575BCFA), SPH_C64(0x8050103010105090), + SPH_C64(0x9E95DA73DADA9544), SPH_C64(0x727049DB4949703B), + SPH_C64(0x2DBE266A2626BE0B), SPH_C64(0x9B3AF916F9F93A62), + SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0x17E366AA6666E371), + SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0xB968BAD3BABA6803), + SPH_C64(0x192CAEEFAEAE2CB7), SPH_C64(0xBA0D50F050500DEA), + SPH_C64(0xAA0752F6525207F8), SPH_C64(0x313DABE0ABAB3D9A), + SPH_C64(0x2811050F0505112D), SPH_C64(0xD317F00DF0F01723), + SPH_C64(0x68390D170D0D3965), SPH_C64(0xBFA273957373A2CC), + SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x2014040C04041424), + SPH_C64(0x1DA020602020A03D), SPH_C64(0xA321FE1FFEFE215D), + SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xFB06F502F5F5060E), + SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0xC23E5FE15F5F3E9D), + SPH_C64(0x50220A1E0A0A225A), SPH_C64(0xC15BB5C2B5B55B74), + SPH_C64(0x4EE7C05DC0C0E78E), SPH_C64(0x691AA0FDA0A01AC9), + SPH_C64(0xAFA871937171A8DE), SPH_C64(0x410BA5F2A5A50BE4), + SPH_C64(0x75992D772D2D9958), SPH_C64(0x27FD60A06060FD47), + SPH_C64(0xB7A772967272A7C5), SPH_C64(0xECE593A89393E57F), + SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x4028081808082848), + SPH_C64(0x6CB583988383B5EF), SPH_C64(0x15A521632121A534), + SPH_C64(0xDA315CE45C5C3186), SPH_C64(0x4CA187948787A1CB), + SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x5347E03DE0E047B3), + SPH_C64(0x0000000000000000), SPH_C64(0x56E8C358C3C3E895), + SPH_C64(0x905A123612125A82), SPH_C64(0xFCEF91AE9191EF6D), + SPH_C64(0x24988A838A8A98AE), SPH_C64(0x100A020602020A12), + SPH_C64(0xE06C1C241C1C6CFC), SPH_C64(0x6359E637E6E65985), + SPH_C64(0x124C45CF45454C57), SPH_C64(0x5EEDC25BC2C2ED9C), + SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0xBB2EFD1AFDFD2E46), + SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x1A4944CC4444495E), + SPH_C64(0x611FA1FEA1A11FC0), SPH_C64(0x5A614CD44C4C6116), + SPH_C64(0x85FF33553333FFB6), SPH_C64(0x66F6C552C5C5F6A3), + SPH_C64(0x54AE84918484AED0), SPH_C64(0x05AF23652323AF26), + SPH_C64(0xC7917C847C7C91BB), SPH_C64(0xE94AB0CDB0B04A59), + SPH_C64(0x35B1256F2525B110), SPH_C64(0xA841153F151541BD), + SPH_C64(0xB5E1355F3535E180), SPH_C64(0x6FD069BB6969D006), + SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xD4FE94A19494FE40), + SPH_C64(0x52644DD74D4D641F), SPH_C64(0xA7AD70907070ADD7), + SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x1129AFECAFAF29BE), + SPH_C64(0x26DECD4ACDCDDEEB), SPH_C64(0xFEA9D667D6D6A928), + SPH_C64(0x47C16CB46C6CC12B), SPH_C64(0xD151B7C4B7B75166), + SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x482D091B09092D41), + SPH_C64(0xCB18F308F3F31838), SPH_C64(0x1FE667A96767E678), + SPH_C64(0x490EA4F1A4A40EED), SPH_C64(0x0365EA23EAEA65E9), + SPH_C64(0x337BEC29ECEC7BDF), SPH_C64(0xD954B6C7B6B6546F), + SPH_C64(0xEEA3D461D4D4A33A), SPH_C64(0xDEBDD26BD2D2BD0C), + SPH_C64(0xA044143C141444B4), SPH_C64(0xF0661E221E1E66EE), + SPH_C64(0x5B42E13EE1E142BA), SPH_C64(0x3DB4246C2424B419), + SPH_C64(0xDDD838483838D8E5), SPH_C64(0x7EF9C657C6C6F9B8), + SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x627A4BDD4B4B7A29), + SPH_C64(0xF78F7A8E7A7A8F8D), SPH_C64(0xCDD23A4E3A3AD2F7), + SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xCA3B5EE25E5E3B94), + SPH_C64(0xB684DF7CDFDF8469), SPH_C64(0xDCFB95A29595FB49), + SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x3938AAE3AAAA3893), + SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x3ED1CE4FCECED1F0), + SPH_C64(0x381B070907071B3F), SPH_C64(0x78330F110F0F3377), + SPH_C64(0xF5C93D473D3DC9C8), SPH_C64(0xFA2558E8585825A2), + SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0xB4C298B59898C22C), + SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xC31DF20BF2F21D31), + SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0x8855113311115599), + SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x2C9D8B808B8B9DA7), + SPH_C64(0x225243C543435261), SPH_C64(0x180F030503030F1B), + SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0xAE8BDC79DCDC8B72), + SPH_C64(0x7B56E532E5E5569E), SPH_C64(0xF940B2CBB2B2404B), + SPH_C64(0x4A6B4ED24E4E6B04), SPH_C64(0x76FCC754C7C7FCB1), + SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x1B6AE926E9E96AF2), + SPH_C64(0x25BB27692727BB02), SPH_C64(0x3A5D40C040405D7A), + SPH_C64(0x8E9FD875D8D89F56), SPH_C64(0xA5EB37593737EB92), + SPH_C64(0xE4E092AB9292E076), SPH_C64(0x0C898F8C8F8F8983), + SPH_C64(0x0805010301010509), SPH_C64(0xE8691D271D1D69F5), + SPH_C64(0xA20253F5535302F1), SPH_C64(0xEDC63E423E3EC6D3), + SPH_C64(0xF22059EB595920AB), SPH_C64(0x46E2C15EC1C1E287), + SPH_C64(0x426E4FD14F4F6E0D), SPH_C64(0x8DFA32563232FABF), + SPH_C64(0xB04E163A16164EA6), SPH_C64(0x8335FA13FAFA3579), + SPH_C64(0x87B9749C7474B9F3), SPH_C64(0x8B30FB10FBFB3070), + SPH_C64(0x3FF263A56363F25C), SPH_C64(0x8CD99FBC9F9FD913), + SPH_C64(0xBDE4345C3434E489), SPH_C64(0xD0721A2E1A1A72CA), + SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xEA2F5AEE5A5A2FB0), + SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0x06CAC946C9C9CACF), + SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0xE309F607F6F60915), + SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0x5D88287828288875), + SPH_C64(0x34928885888892BC), SPH_C64(0xACCD9BB09B9BCD37), + SPH_C64(0x95F531533131F5A4), SPH_C64(0x70360E120E0E367E), + SPH_C64(0x8173BDDABDBD733C), SPH_C64(0x6A7F4ADE4A4A7F20), + SPH_C64(0x136FE825E8E86FFB), SPH_C64(0xC4F496A79696F452), + SPH_C64(0x5904A6F7A6A604FF), SPH_C64(0x603C0C140C0C3C6C), + SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xEF80798B79798096), + SPH_C64(0x8976BCD9BCBC7635), SPH_C64(0x997CBEDFBEBE7C27), + SPH_C64(0x2B74EF2CEFEF74C4), SPH_C64(0x57CB6EB26E6ECB39), + SPH_C64(0x0A4346CA4646434C), SPH_C64(0xCCF197A49797F15B), + SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x3B7EED2AEDED7ED6), + SPH_C64(0xC87D192B19197DD1), SPH_C64(0x869AD976D9D99A5F), + SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0xBCC799B69999C725), + SPH_C64(0x2932A8E5A8A83281), SPH_C64(0x558D297B29298D7C), + SPH_C64(0x07E964AC6464E963), SPH_C64(0xF8631F211F1F63E7), + SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0x921C55FF55551CC7), + SPH_C64(0x985F133513135F8B), SPH_C64(0xB16DBBD0BBBB6D0A), + SPH_C64(0xEB0CF704F7F70C1C), SPH_C64(0x5FCE6FB16F6FCE30), + SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x024647C947474645), + SPH_C64(0x65932F712F2F934A), SPH_C64(0x2371EE2FEEEE71CD), + SPH_C64(0xA962B8D5B8B86211), SPH_C64(0xFF8A7B8D7B7B8A84), + SPH_C64(0x3C978986898997B5), SPH_C64(0x9DF030503030F0AD), + SPH_C64(0xD6B8D368D3D3B805), SPH_C64(0xDF9E7F817F7F9EA0), + SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6) +}; + +static const sph_u64 old0_T3[256] = { + SPH_C64(0xD568B86868D50F67), SPH_C64(0xB7D06DD0D0B71ECE), + SPH_C64(0x60EB20EBEB60E00B), SPH_C64(0x872B7D2B2B876E45), + SPH_C64(0x7548D8484875327A), SPH_C64(0xD39DBA9D9DD3019C), + SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x53E431E4E4539773), + SPH_C64(0x48E338E3E348A84B), SPH_C64(0x15A3F8A3A315D271), + SPH_C64(0x1356FA565613DC8A), SPH_C64(0xBF819E8181BFFD7C), + SPH_C64(0x947D877D7D94B2CF), SPH_C64(0x12F10EF1F1122ADB), + SPH_C64(0xAB85928585ABD95C), SPH_C64(0xDC9EBF9E9EDC1A84), + SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0x8C8E8F8E8E8C8A04), + SPH_C64(0x8578887878859FE7), SPH_C64(0xC5CA43CACAC5D41E), + SPH_C64(0x4B173917174BAFB8), SPH_C64(0x37A9E6A9A9378821), + SPH_C64(0xF861A36161F84E2F), SPH_C64(0xA6D562D5D5A633E6), + SPH_C64(0x345DE75D5D348FD2), SPH_C64(0x270B1D0B0B275358), + SPH_C64(0x868C898C8C869814), SPH_C64(0xCC3C443C3CCCC1FD), + SPH_C64(0xB677997777B6E89F), SPH_C64(0x0851F3515108E3B2), + SPH_C64(0xAA22662222AA2F0D), SPH_C64(0x5742C6424257682A), + SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1954FC545419CE9A), + SPH_C64(0x5841C34141587332), SPH_C64(0xBA809D8080BAF474), + SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0xA486978686A4C244), + SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0x781828181878D8C0), + SPH_C64(0x962E722E2E96436D), SPH_C64(0x1657F9575716D582), + SPH_C64(0x1E060A06061E3630), SPH_C64(0xF762A66262F75537), + SPH_C64(0x03F401F4F40307F3), SPH_C64(0xEE365A3636EE9BAD), + SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0xDA6BBD6B6BDA147F), + SPH_C64(0x771B2D1B1B77C3D8), SPH_C64(0xEC65AF6565EC6A0F), + SPH_C64(0xBC759F7575BCFA8F), SPH_C64(0x5010301010509080), + SPH_C64(0x95DA73DADA95449E), SPH_C64(0x7049DB4949703B72), + SPH_C64(0xBE266A2626BE0B2D), SPH_C64(0x3AF916F9F93A629B), + SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xE366AA6666E37117), + SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0x68BAD3BABA6803B9), + SPH_C64(0x2CAEEFAEAE2CB719), SPH_C64(0x0D50F050500DEABA), + SPH_C64(0x0752F6525207F8AA), SPH_C64(0x3DABE0ABAB3D9A31), + SPH_C64(0x11050F0505112D28), SPH_C64(0x17F00DF0F01723D3), + SPH_C64(0x390D170D0D396568), SPH_C64(0xA273957373A2CCBF), + SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x14040C0404142420), + SPH_C64(0xA020602020A03D1D), SPH_C64(0x21FE1FFEFE215DA3), + SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x06F502F5F5060EFB), + SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x3E5FE15F5F3E9DC2), + SPH_C64(0x220A1E0A0A225A50), SPH_C64(0x5BB5C2B5B55B74C1), + SPH_C64(0xE7C05DC0C0E78E4E), SPH_C64(0x1AA0FDA0A01AC969), + SPH_C64(0xA871937171A8DEAF), SPH_C64(0x0BA5F2A5A50BE441), + SPH_C64(0x992D772D2D995875), SPH_C64(0xFD60A06060FD4727), + SPH_C64(0xA772967272A7C5B7), SPH_C64(0xE593A89393E57FEC), + SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x2808180808284840), + SPH_C64(0xB583988383B5EF6C), SPH_C64(0xA521632121A53415), + SPH_C64(0x315CE45C5C3186DA), SPH_C64(0xA187948787A1CB4C), + SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x47E03DE0E047B353), + SPH_C64(0x0000000000000000), SPH_C64(0xE8C358C3C3E89556), + SPH_C64(0x5A123612125A8290), SPH_C64(0xEF91AE9191EF6DFC), + SPH_C64(0x988A838A8A98AE24), SPH_C64(0x0A020602020A1210), + SPH_C64(0x6C1C241C1C6CFCE0), SPH_C64(0x59E637E6E6598563), + SPH_C64(0x4C45CF45454C5712), SPH_C64(0xEDC25BC2C2ED9C5E), + SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x2EFD1AFDFD2E46BB), + SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x4944CC4444495E1A), + SPH_C64(0x1FA1FEA1A11FC061), SPH_C64(0x614CD44C4C61165A), + SPH_C64(0xFF33553333FFB685), SPH_C64(0xF6C552C5C5F6A366), + SPH_C64(0xAE84918484AED054), SPH_C64(0xAF23652323AF2605), + SPH_C64(0x917C847C7C91BBC7), SPH_C64(0x4AB0CDB0B04A59E9), + SPH_C64(0xB1256F2525B11035), SPH_C64(0x41153F151541BDA8), + SPH_C64(0xE1355F3535E180B5), SPH_C64(0xD069BB6969D0066F), + SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0xFE94A19494FE40D4), + SPH_C64(0x644DD74D4D641F52), SPH_C64(0xAD70907070ADD7A7), + SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x29AFECAFAF29BE11), + SPH_C64(0xDECD4ACDCDDEEB26), SPH_C64(0xA9D667D6D6A928FE), + SPH_C64(0xC16CB46C6CC12B47), SPH_C64(0x51B7C4B7B75166D1), + SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0x2D091B09092D4148), + SPH_C64(0x18F308F3F31838CB), SPH_C64(0xE667A96767E6781F), + SPH_C64(0x0EA4F1A4A40EED49), SPH_C64(0x65EA23EAEA65E903), + SPH_C64(0x7BEC29ECEC7BDF33), SPH_C64(0x54B6C7B6B6546FD9), + SPH_C64(0xA3D461D4D4A33AEE), SPH_C64(0xBDD26BD2D2BD0CDE), + SPH_C64(0x44143C141444B4A0), SPH_C64(0x661E221E1E66EEF0), + SPH_C64(0x42E13EE1E142BA5B), SPH_C64(0xB4246C2424B4193D), + SPH_C64(0xD838483838D8E5DD), SPH_C64(0xF9C657C6C6F9B87E), + SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x7A4BDD4B4B7A2962), + SPH_C64(0x8F7A8E7A7A8F8DF7), SPH_C64(0xD23A4E3A3AD2F7CD), + SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x3B5EE25E5E3B94CA), + SPH_C64(0x84DF7CDFDF8469B6), SPH_C64(0xFB95A29595FB49DC), + SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x38AAE3AAAA389339), + SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xD1CE4FCECED1F03E), + SPH_C64(0x1B070907071B3F38), SPH_C64(0x330F110F0F337778), + SPH_C64(0xC93D473D3DC9C8F5), SPH_C64(0x2558E8585825A2FA), + SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xC298B59898C22CB4), + SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x1DF20BF2F21D31C3), + SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x5511331111559988), + SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0x9D8B808B8B9DA72C), + SPH_C64(0x5243C54343526122), SPH_C64(0x0F030503030F1B18), + SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0x8BDC79DCDC8B72AE), + SPH_C64(0x56E532E5E5569E7B), SPH_C64(0x40B2CBB2B2404BF9), + SPH_C64(0x6B4ED24E4E6B044A), SPH_C64(0xFCC754C7C7FCB176), + SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x6AE926E9E96AF21B), + SPH_C64(0xBB27692727BB0225), SPH_C64(0x5D40C040405D7A3A), + SPH_C64(0x9FD875D8D89F568E), SPH_C64(0xEB37593737EB92A5), + SPH_C64(0xE092AB9292E076E4), SPH_C64(0x898F8C8F8F89830C), + SPH_C64(0x0501030101050908), SPH_C64(0x691D271D1D69F5E8), + SPH_C64(0x0253F5535302F1A2), SPH_C64(0xC63E423E3EC6D3ED), + SPH_C64(0x2059EB595920ABF2), SPH_C64(0xE2C15EC1C1E28746), + SPH_C64(0x6E4FD14F4F6E0D42), SPH_C64(0xFA32563232FABF8D), + SPH_C64(0x4E163A16164EA6B0), SPH_C64(0x35FA13FAFA357983), + SPH_C64(0xB9749C7474B9F387), SPH_C64(0x30FB10FBFB30708B), + SPH_C64(0xF263A56363F25C3F), SPH_C64(0xD99FBC9F9FD9138C), + SPH_C64(0xE4345C3434E489BD), SPH_C64(0x721A2E1A1A72CAD0), + SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x2F5AEE5A5A2FB0EA), + SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xCAC946C9C9CACF06), + SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x09F607F6F60915E3), + SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x882878282888755D), + SPH_C64(0x928885888892BC34), SPH_C64(0xCD9BB09B9BCD37AC), + SPH_C64(0xF531533131F5A495), SPH_C64(0x360E120E0E367E70), + SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x7F4ADE4A4A7F206A), + SPH_C64(0x6FE825E8E86FFB13), SPH_C64(0xF496A79696F452C4), + SPH_C64(0x04A6F7A6A604FF59), SPH_C64(0x3C0C140C0C3C6C60), + SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x80798B79798096EF), + SPH_C64(0x76BCD9BCBC763589), SPH_C64(0x7CBEDFBEBE7C2799), + SPH_C64(0x74EF2CEFEF74C42B), SPH_C64(0xCB6EB26E6ECB3957), + SPH_C64(0x4346CA4646434C0A), SPH_C64(0xF197A49797F15BCC), + SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x7EED2AEDED7ED63B), + SPH_C64(0x7D192B19197DD1C8), SPH_C64(0x9AD976D9D99A5F86), + SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0xC799B69999C725BC), + SPH_C64(0x32A8E5A8A8328129), SPH_C64(0x8D297B29298D7C55), + SPH_C64(0xE964AC6464E96307), SPH_C64(0x631F211F1F63E7F8), + SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x1C55FF55551CC792), + SPH_C64(0x5F133513135F8B98), SPH_C64(0x6DBBD0BBBB6D0AB1), + SPH_C64(0x0CF704F7F70C1CEB), SPH_C64(0xCE6FB16F6FCE305F), + SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x4647C94747464502), + SPH_C64(0x932F712F2F934A65), SPH_C64(0x71EE2FEEEE71CD23), + SPH_C64(0x62B8D5B8B86211A9), SPH_C64(0x8A7B8D7B7B8A84FF), + SPH_C64(0x978986898997B53C), SPH_C64(0xF030503030F0AD9D), + SPH_C64(0xB8D368D3D3B805D6), SPH_C64(0x9E7F817F7F9EA0DF), + SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664) +}; + +static const sph_u64 old0_T4[256] = { + SPH_C64(0x68B86868D50F67D5), SPH_C64(0xD06DD0D0B71ECEB7), + SPH_C64(0xEB20EBEB60E00B60), SPH_C64(0x2B7D2B2B876E4587), + SPH_C64(0x48D8484875327A75), SPH_C64(0x9DBA9D9DD3019CD3), + SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0xE431E4E453977353), + SPH_C64(0xE338E3E348A84B48), SPH_C64(0xA3F8A3A315D27115), + SPH_C64(0x56FA565613DC8A13), SPH_C64(0x819E8181BFFD7CBF), + SPH_C64(0x7D877D7D94B2CF94), SPH_C64(0xF10EF1F1122ADB12), + SPH_C64(0x85928585ABD95CAB), SPH_C64(0x9EBF9E9EDC1A84DC), + SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0x8E8F8E8E8C8A048C), + SPH_C64(0x78887878859FE785), SPH_C64(0xCA43CACAC5D41EC5), + SPH_C64(0x173917174BAFB84B), SPH_C64(0xA9E6A9A937882137), + SPH_C64(0x61A36161F84E2FF8), SPH_C64(0xD562D5D5A633E6A6), + SPH_C64(0x5DE75D5D348FD234), SPH_C64(0x0B1D0B0B27535827), + SPH_C64(0x8C898C8C86981486), SPH_C64(0x3C443C3CCCC1FDCC), + SPH_C64(0x77997777B6E89FB6), SPH_C64(0x51F3515108E3B208), + SPH_C64(0x22662222AA2F0DAA), SPH_C64(0x42C6424257682A57), + SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x54FC545419CE9A19), + SPH_C64(0x41C3414158733258), SPH_C64(0x809D8080BAF474BA), + SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x86978686A4C244A4), + SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x1828181878D8C078), + SPH_C64(0x2E722E2E96436D96), SPH_C64(0x57F9575716D58216), + SPH_C64(0x060A06061E36301E), SPH_C64(0x62A66262F75537F7), + SPH_C64(0xF401F4F40307F303), SPH_C64(0x365A3636EE9BADEE), + SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0x6BBD6B6BDA147FDA), + SPH_C64(0x1B2D1B1B77C3D877), SPH_C64(0x65AF6565EC6A0FEC), + SPH_C64(0x759F7575BCFA8FBC), SPH_C64(0x1030101050908050), + SPH_C64(0xDA73DADA95449E95), SPH_C64(0x49DB4949703B7270), + SPH_C64(0x266A2626BE0B2DBE), SPH_C64(0xF916F9F93A629B3A), + SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x66AA6666E37117E3), + SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0xBAD3BABA6803B968), + SPH_C64(0xAEEFAEAE2CB7192C), SPH_C64(0x50F050500DEABA0D), + SPH_C64(0x52F6525207F8AA07), SPH_C64(0xABE0ABAB3D9A313D), + SPH_C64(0x050F0505112D2811), SPH_C64(0xF00DF0F01723D317), + SPH_C64(0x0D170D0D39656839), SPH_C64(0x73957373A2CCBFA2), + SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0x040C040414242014), + SPH_C64(0x20602020A03D1DA0), SPH_C64(0xFE1FFEFE215DA321), + SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0xF502F5F5060EFB06), + SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x5FE15F5F3E9DC23E), + SPH_C64(0x0A1E0A0A225A5022), SPH_C64(0xB5C2B5B55B74C15B), + SPH_C64(0xC05DC0C0E78E4EE7), SPH_C64(0xA0FDA0A01AC9691A), + SPH_C64(0x71937171A8DEAFA8), SPH_C64(0xA5F2A5A50BE4410B), + SPH_C64(0x2D772D2D99587599), SPH_C64(0x60A06060FD4727FD), + SPH_C64(0x72967272A7C5B7A7), SPH_C64(0x93A89393E57FECE5), + SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x0818080828484028), + SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x21632121A53415A5), + SPH_C64(0x5CE45C5C3186DA31), SPH_C64(0x87948787A1CB4CA1), + SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xE03DE0E047B35347), + SPH_C64(0x0000000000000000), SPH_C64(0xC358C3C3E89556E8), + SPH_C64(0x123612125A82905A), SPH_C64(0x91AE9191EF6DFCEF), + SPH_C64(0x8A838A8A98AE2498), SPH_C64(0x020602020A12100A), + SPH_C64(0x1C241C1C6CFCE06C), SPH_C64(0xE637E6E659856359), + SPH_C64(0x45CF45454C57124C), SPH_C64(0xC25BC2C2ED9C5EED), + SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0xFD1AFDFD2E46BB2E), + SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x44CC4444495E1A49), + SPH_C64(0xA1FEA1A11FC0611F), SPH_C64(0x4CD44C4C61165A61), + SPH_C64(0x33553333FFB685FF), SPH_C64(0xC552C5C5F6A366F6), + SPH_C64(0x84918484AED054AE), SPH_C64(0x23652323AF2605AF), + SPH_C64(0x7C847C7C91BBC791), SPH_C64(0xB0CDB0B04A59E94A), + SPH_C64(0x256F2525B11035B1), SPH_C64(0x153F151541BDA841), + SPH_C64(0x355F3535E180B5E1), SPH_C64(0x69BB6969D0066FD0), + SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x94A19494FE40D4FE), + SPH_C64(0x4DD74D4D641F5264), SPH_C64(0x70907070ADD7A7AD), + SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xAFECAFAF29BE1129), + SPH_C64(0xCD4ACDCDDEEB26DE), SPH_C64(0xD667D6D6A928FEA9), + SPH_C64(0x6CB46C6CC12B47C1), SPH_C64(0xB7C4B7B75166D151), + SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x091B09092D41482D), + SPH_C64(0xF308F3F31838CB18), SPH_C64(0x67A96767E6781FE6), + SPH_C64(0xA4F1A4A40EED490E), SPH_C64(0xEA23EAEA65E90365), + SPH_C64(0xEC29ECEC7BDF337B), SPH_C64(0xB6C7B6B6546FD954), + SPH_C64(0xD461D4D4A33AEEA3), SPH_C64(0xD26BD2D2BD0CDEBD), + SPH_C64(0x143C141444B4A044), SPH_C64(0x1E221E1E66EEF066), + SPH_C64(0xE13EE1E142BA5B42), SPH_C64(0x246C2424B4193DB4), + SPH_C64(0x38483838D8E5DDD8), SPH_C64(0xC657C6C6F9B87EF9), + SPH_C64(0xDB70DBDB904D9690), SPH_C64(0x4BDD4B4B7A29627A), + SPH_C64(0x7A8E7A7A8F8DF78F), SPH_C64(0x3A4E3A3AD2F7CDD2), + SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x5EE25E5E3B94CA3B), + SPH_C64(0xDF7CDFDF8469B684), SPH_C64(0x95A29595FB49DCFB), + SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0xAAE3AAAA38933938), + SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xCE4FCECED1F03ED1), + SPH_C64(0x070907071B3F381B), SPH_C64(0x0F110F0F33777833), + SPH_C64(0x3D473D3DC9C8F5C9), SPH_C64(0x58E8585825A2FA25), + SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x98B59898C22CB4C2), + SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0xF20BF2F21D31C31D), + SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x1133111155998855), + SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x8B808B8B9DA72C9D), + SPH_C64(0x43C5434352612252), SPH_C64(0x030503030F1B180F), + SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0xDC79DCDC8B72AE8B), + SPH_C64(0xE532E5E5569E7B56), SPH_C64(0xB2CBB2B2404BF940), + SPH_C64(0x4ED24E4E6B044A6B), SPH_C64(0xC754C7C7FCB176FC), + SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0xE926E9E96AF21B6A), + SPH_C64(0x27692727BB0225BB), SPH_C64(0x40C040405D7A3A5D), + SPH_C64(0xD875D8D89F568E9F), SPH_C64(0x37593737EB92A5EB), + SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x8F8C8F8F89830C89), + SPH_C64(0x0103010105090805), SPH_C64(0x1D271D1D69F5E869), + SPH_C64(0x53F5535302F1A202), SPH_C64(0x3E423E3EC6D3EDC6), + SPH_C64(0x59EB595920ABF220), SPH_C64(0xC15EC1C1E28746E2), + SPH_C64(0x4FD14F4F6E0D426E), SPH_C64(0x32563232FABF8DFA), + SPH_C64(0x163A16164EA6B04E), SPH_C64(0xFA13FAFA35798335), + SPH_C64(0x749C7474B9F387B9), SPH_C64(0xFB10FBFB30708B30), + SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x9FBC9F9FD9138CD9), + SPH_C64(0x345C3434E489BDE4), SPH_C64(0x1A2E1A1A72CAD072), + SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0x5AEE5A5A2FB0EA2F), + SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0xC946C9C9CACF06CA), + SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0xF607F6F60915E309), + SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x2878282888755D88), + SPH_C64(0x8885888892BC3492), SPH_C64(0x9BB09B9BCD37ACCD), + SPH_C64(0x31533131F5A495F5), SPH_C64(0x0E120E0E367E7036), + SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x4ADE4A4A7F206A7F), + SPH_C64(0xE825E8E86FFB136F), SPH_C64(0x96A79696F452C4F4), + SPH_C64(0xA6F7A6A604FF5904), SPH_C64(0x0C140C0C3C6C603C), + SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x798B79798096EF80), + SPH_C64(0xBCD9BCBC76358976), SPH_C64(0xBEDFBEBE7C27997C), + SPH_C64(0xEF2CEFEF74C42B74), SPH_C64(0x6EB26E6ECB3957CB), + SPH_C64(0x46CA4646434C0A43), SPH_C64(0x97A49797F15BCCF1), + SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0xED2AEDED7ED63B7E), + SPH_C64(0x192B19197DD1C87D), SPH_C64(0xD976D9D99A5F869A), + SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x99B69999C725BCC7), + SPH_C64(0xA8E5A8A832812932), SPH_C64(0x297B29298D7C558D), + SPH_C64(0x64AC6464E96307E9), SPH_C64(0x1F211F1F63E7F863), + SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x55FF55551CC7921C), + SPH_C64(0x133513135F8B985F), SPH_C64(0xBBD0BBBB6D0AB16D), + SPH_C64(0xF704F7F70C1CEB0C), SPH_C64(0x6FB16F6FCE305FCE), + SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x47C9474746450246), + SPH_C64(0x2F712F2F934A6593), SPH_C64(0xEE2FEEEE71CD2371), + SPH_C64(0xB8D5B8B86211A962), SPH_C64(0x7B8D7B7B8A84FF8A), + SPH_C64(0x8986898997B53C97), SPH_C64(0x30503030F0AD9DF0), + SPH_C64(0xD368D3D3B805D6B8), SPH_C64(0x7F817F7F9EA0DF9E), + SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0) +}; + +static const sph_u64 old0_T5[256] = { + SPH_C64(0xB86868D50F67D568), SPH_C64(0x6DD0D0B71ECEB7D0), + SPH_C64(0x20EBEB60E00B60EB), SPH_C64(0x7D2B2B876E45872B), + SPH_C64(0xD8484875327A7548), SPH_C64(0xBA9D9DD3019CD39D), + SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0x31E4E453977353E4), + SPH_C64(0x38E3E348A84B48E3), SPH_C64(0xF8A3A315D27115A3), + SPH_C64(0xFA565613DC8A1356), SPH_C64(0x9E8181BFFD7CBF81), + SPH_C64(0x877D7D94B2CF947D), SPH_C64(0x0EF1F1122ADB12F1), + SPH_C64(0x928585ABD95CAB85), SPH_C64(0xBF9E9EDC1A84DC9E), + SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x8F8E8E8C8A048C8E), + SPH_C64(0x887878859FE78578), SPH_C64(0x43CACAC5D41EC5CA), + SPH_C64(0x3917174BAFB84B17), SPH_C64(0xE6A9A937882137A9), + SPH_C64(0xA36161F84E2FF861), SPH_C64(0x62D5D5A633E6A6D5), + SPH_C64(0xE75D5D348FD2345D), SPH_C64(0x1D0B0B275358270B), + SPH_C64(0x898C8C869814868C), SPH_C64(0x443C3CCCC1FDCC3C), + SPH_C64(0x997777B6E89FB677), SPH_C64(0xF3515108E3B20851), + SPH_C64(0x662222AA2F0DAA22), SPH_C64(0xC6424257682A5742), + SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFC545419CE9A1954), + SPH_C64(0xC341415873325841), SPH_C64(0x9D8080BAF474BA80), + SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0x978686A4C244A486), + SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x28181878D8C07818), + SPH_C64(0x722E2E96436D962E), SPH_C64(0xF9575716D5821657), + SPH_C64(0x0A06061E36301E06), SPH_C64(0xA66262F75537F762), + SPH_C64(0x01F4F40307F303F4), SPH_C64(0x5A3636EE9BADEE36), + SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xBD6B6BDA147FDA6B), + SPH_C64(0x2D1B1B77C3D8771B), SPH_C64(0xAF6565EC6A0FEC65), + SPH_C64(0x9F7575BCFA8FBC75), SPH_C64(0x3010105090805010), + SPH_C64(0x73DADA95449E95DA), SPH_C64(0xDB4949703B727049), + SPH_C64(0x6A2626BE0B2DBE26), SPH_C64(0x16F9F93A629B3AF9), + SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0xAA6666E37117E366), + SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xD3BABA6803B968BA), + SPH_C64(0xEFAEAE2CB7192CAE), SPH_C64(0xF050500DEABA0D50), + SPH_C64(0xF6525207F8AA0752), SPH_C64(0xE0ABAB3D9A313DAB), + SPH_C64(0x0F0505112D281105), SPH_C64(0x0DF0F01723D317F0), + SPH_C64(0x170D0D396568390D), SPH_C64(0x957373A2CCBFA273), + SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0x0C04041424201404), + SPH_C64(0x602020A03D1DA020), SPH_C64(0x1FFEFE215DA321FE), + SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x02F5F5060EFB06F5), + SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xE15F5F3E9DC23E5F), + SPH_C64(0x1E0A0A225A50220A), SPH_C64(0xC2B5B55B74C15BB5), + SPH_C64(0x5DC0C0E78E4EE7C0), SPH_C64(0xFDA0A01AC9691AA0), + SPH_C64(0x937171A8DEAFA871), SPH_C64(0xF2A5A50BE4410BA5), + SPH_C64(0x772D2D995875992D), SPH_C64(0xA06060FD4727FD60), + SPH_C64(0x967272A7C5B7A772), SPH_C64(0xA89393E57FECE593), + SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0x1808082848402808), + SPH_C64(0x988383B5EF6CB583), SPH_C64(0x632121A53415A521), + SPH_C64(0xE45C5C3186DA315C), SPH_C64(0x948787A1CB4CA187), + SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0x3DE0E047B35347E0), + SPH_C64(0x0000000000000000), SPH_C64(0x58C3C3E89556E8C3), + SPH_C64(0x3612125A82905A12), SPH_C64(0xAE9191EF6DFCEF91), + SPH_C64(0x838A8A98AE24988A), SPH_C64(0x0602020A12100A02), + SPH_C64(0x241C1C6CFCE06C1C), SPH_C64(0x37E6E659856359E6), + SPH_C64(0xCF45454C57124C45), SPH_C64(0x5BC2C2ED9C5EEDC2), + SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x1AFDFD2E46BB2EFD), + SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0xCC4444495E1A4944), + SPH_C64(0xFEA1A11FC0611FA1), SPH_C64(0xD44C4C61165A614C), + SPH_C64(0x553333FFB685FF33), SPH_C64(0x52C5C5F6A366F6C5), + SPH_C64(0x918484AED054AE84), SPH_C64(0x652323AF2605AF23), + SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xCDB0B04A59E94AB0), + SPH_C64(0x6F2525B11035B125), SPH_C64(0x3F151541BDA84115), + SPH_C64(0x5F3535E180B5E135), SPH_C64(0xBB6969D0066FD069), + SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0xA19494FE40D4FE94), + SPH_C64(0xD74D4D641F52644D), SPH_C64(0x907070ADD7A7AD70), + SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0xECAFAF29BE1129AF), + SPH_C64(0x4ACDCDDEEB26DECD), SPH_C64(0x67D6D6A928FEA9D6), + SPH_C64(0xB46C6CC12B47C16C), SPH_C64(0xC4B7B75166D151B7), + SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x1B09092D41482D09), + SPH_C64(0x08F3F31838CB18F3), SPH_C64(0xA96767E6781FE667), + SPH_C64(0xF1A4A40EED490EA4), SPH_C64(0x23EAEA65E90365EA), + SPH_C64(0x29ECEC7BDF337BEC), SPH_C64(0xC7B6B6546FD954B6), + SPH_C64(0x61D4D4A33AEEA3D4), SPH_C64(0x6BD2D2BD0CDEBDD2), + SPH_C64(0x3C141444B4A04414), SPH_C64(0x221E1E66EEF0661E), + SPH_C64(0x3EE1E142BA5B42E1), SPH_C64(0x6C2424B4193DB424), + SPH_C64(0x483838D8E5DDD838), SPH_C64(0x57C6C6F9B87EF9C6), + SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xDD4B4B7A29627A4B), + SPH_C64(0x8E7A7A8F8DF78F7A), SPH_C64(0x4E3A3AD2F7CDD23A), + SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0xE25E5E3B94CA3B5E), + SPH_C64(0x7CDFDF8469B684DF), SPH_C64(0xA29595FB49DCFB95), + SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0xE3AAAA38933938AA), + SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x4FCECED1F03ED1CE), + SPH_C64(0x0907071B3F381B07), SPH_C64(0x110F0F337778330F), + SPH_C64(0x473D3DC9C8F5C93D), SPH_C64(0xE8585825A2FA2558), + SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0xB59898C22CB4C298), + SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x0BF2F21D31C31DF2), + SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x3311115599885511), + SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x808B8B9DA72C9D8B), + SPH_C64(0xC543435261225243), SPH_C64(0x0503030F1B180F03), + SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0x79DCDC8B72AE8BDC), + SPH_C64(0x32E5E5569E7B56E5), SPH_C64(0xCBB2B2404BF940B2), + SPH_C64(0xD24E4E6B044A6B4E), SPH_C64(0x54C7C7FCB176FCC7), + SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x26E9E96AF21B6AE9), + SPH_C64(0x692727BB0225BB27), SPH_C64(0xC040405D7A3A5D40), + SPH_C64(0x75D8D89F568E9FD8), SPH_C64(0x593737EB92A5EB37), + SPH_C64(0xAB9292E076E4E092), SPH_C64(0x8C8F8F89830C898F), + SPH_C64(0x0301010509080501), SPH_C64(0x271D1D69F5E8691D), + SPH_C64(0xF5535302F1A20253), SPH_C64(0x423E3EC6D3EDC63E), + SPH_C64(0xEB595920ABF22059), SPH_C64(0x5EC1C1E28746E2C1), + SPH_C64(0xD14F4F6E0D426E4F), SPH_C64(0x563232FABF8DFA32), + SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x13FAFA35798335FA), + SPH_C64(0x9C7474B9F387B974), SPH_C64(0x10FBFB30708B30FB), + SPH_C64(0xA56363F25C3FF263), SPH_C64(0xBC9F9FD9138CD99F), + SPH_C64(0x5C3434E489BDE434), SPH_C64(0x2E1A1A72CAD0721A), + SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xEE5A5A2FB0EA2F5A), + SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x46C9C9CACF06CAC9), + SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x07F6F60915E309F6), + SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0x78282888755D8828), + SPH_C64(0x85888892BC349288), SPH_C64(0xB09B9BCD37ACCD9B), + SPH_C64(0x533131F5A495F531), SPH_C64(0x120E0E367E70360E), + SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xDE4A4A7F206A7F4A), + SPH_C64(0x25E8E86FFB136FE8), SPH_C64(0xA79696F452C4F496), + SPH_C64(0xF7A6A604FF5904A6), SPH_C64(0x140C0C3C6C603C0C), + SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x8B79798096EF8079), + SPH_C64(0xD9BCBC76358976BC), SPH_C64(0xDFBEBE7C27997CBE), + SPH_C64(0x2CEFEF74C42B74EF), SPH_C64(0xB26E6ECB3957CB6E), + SPH_C64(0xCA4646434C0A4346), SPH_C64(0xA49797F15BCCF197), + SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x2AEDED7ED63B7EED), + SPH_C64(0x2B19197DD1C87D19), SPH_C64(0x76D9D99A5F869AD9), + SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0xB69999C725BCC799), + SPH_C64(0xE5A8A832812932A8), SPH_C64(0x7B29298D7C558D29), + SPH_C64(0xAC6464E96307E964), SPH_C64(0x211F1F63E7F8631F), + SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xFF55551CC7921C55), + SPH_C64(0x3513135F8B985F13), SPH_C64(0xD0BBBB6D0AB16DBB), + SPH_C64(0x04F7F70C1CEB0CF7), SPH_C64(0xB16F6FCE305FCE6F), + SPH_C64(0xD6B9B96718A167B9), SPH_C64(0xC947474645024647), + SPH_C64(0x712F2F934A65932F), SPH_C64(0x2FEEEE71CD2371EE), + SPH_C64(0xD5B8B86211A962B8), SPH_C64(0x8D7B7B8A84FF8A7B), + SPH_C64(0x86898997B53C9789), SPH_C64(0x503030F0AD9DF030), + SPH_C64(0x68D3D3B805D6B8D3), SPH_C64(0x817F7F9EA0DF9E7F), + SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082) +}; + +static const sph_u64 old0_T6[256] = { + SPH_C64(0x6868D50F67D568B8), SPH_C64(0xD0D0B71ECEB7D06D), + SPH_C64(0xEBEB60E00B60EB20), SPH_C64(0x2B2B876E45872B7D), + SPH_C64(0x484875327A7548D8), SPH_C64(0x9D9DD3019CD39DBA), + SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0xE4E453977353E431), + SPH_C64(0xE3E348A84B48E338), SPH_C64(0xA3A315D27115A3F8), + SPH_C64(0x565613DC8A1356FA), SPH_C64(0x8181BFFD7CBF819E), + SPH_C64(0x7D7D94B2CF947D87), SPH_C64(0xF1F1122ADB12F10E), + SPH_C64(0x8585ABD95CAB8592), SPH_C64(0x9E9EDC1A84DC9EBF), + SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0x8E8E8C8A048C8E8F), + SPH_C64(0x7878859FE7857888), SPH_C64(0xCACAC5D41EC5CA43), + SPH_C64(0x17174BAFB84B1739), SPH_C64(0xA9A937882137A9E6), + SPH_C64(0x6161F84E2FF861A3), SPH_C64(0xD5D5A633E6A6D562), + SPH_C64(0x5D5D348FD2345DE7), SPH_C64(0x0B0B275358270B1D), + SPH_C64(0x8C8C869814868C89), SPH_C64(0x3C3CCCC1FDCC3C44), + SPH_C64(0x7777B6E89FB67799), SPH_C64(0x515108E3B20851F3), + SPH_C64(0x2222AA2F0DAA2266), SPH_C64(0x424257682A5742C6), + SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x545419CE9A1954FC), + SPH_C64(0x41415873325841C3), SPH_C64(0x8080BAF474BA809D), + SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x8686A4C244A48697), + SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x181878D8C0781828), + SPH_C64(0x2E2E96436D962E72), SPH_C64(0x575716D5821657F9), + SPH_C64(0x06061E36301E060A), SPH_C64(0x6262F75537F762A6), + SPH_C64(0xF4F40307F303F401), SPH_C64(0x3636EE9BADEE365A), + SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0x6B6BDA147FDA6BBD), + SPH_C64(0x1B1B77C3D8771B2D), SPH_C64(0x6565EC6A0FEC65AF), + SPH_C64(0x7575BCFA8FBC759F), SPH_C64(0x1010509080501030), + SPH_C64(0xDADA95449E95DA73), SPH_C64(0x4949703B727049DB), + SPH_C64(0x2626BE0B2DBE266A), SPH_C64(0xF9F93A629B3AF916), + SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x6666E37117E366AA), + SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0xBABA6803B968BAD3), + SPH_C64(0xAEAE2CB7192CAEEF), SPH_C64(0x50500DEABA0D50F0), + SPH_C64(0x525207F8AA0752F6), SPH_C64(0xABAB3D9A313DABE0), + SPH_C64(0x0505112D2811050F), SPH_C64(0xF0F01723D317F00D), + SPH_C64(0x0D0D396568390D17), SPH_C64(0x7373A2CCBFA27395), + SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0x040414242014040C), + SPH_C64(0x2020A03D1DA02060), SPH_C64(0xFEFE215DA321FE1F), + SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0xF5F5060EFB06F502), + SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x5F5F3E9DC23E5FE1), + SPH_C64(0x0A0A225A50220A1E), SPH_C64(0xB5B55B74C15BB5C2), + SPH_C64(0xC0C0E78E4EE7C05D), SPH_C64(0xA0A01AC9691AA0FD), + SPH_C64(0x7171A8DEAFA87193), SPH_C64(0xA5A50BE4410BA5F2), + SPH_C64(0x2D2D995875992D77), SPH_C64(0x6060FD4727FD60A0), + SPH_C64(0x7272A7C5B7A77296), SPH_C64(0x9393E57FECE593A8), + SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x0808284840280818), + SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x2121A53415A52163), + SPH_C64(0x5C5C3186DA315CE4), SPH_C64(0x8787A1CB4CA18794), + SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xE0E047B35347E03D), + SPH_C64(0x0000000000000000), SPH_C64(0xC3C3E89556E8C358), + SPH_C64(0x12125A82905A1236), SPH_C64(0x9191EF6DFCEF91AE), + SPH_C64(0x8A8A98AE24988A83), SPH_C64(0x02020A12100A0206), + SPH_C64(0x1C1C6CFCE06C1C24), SPH_C64(0xE6E659856359E637), + SPH_C64(0x45454C57124C45CF), SPH_C64(0xC2C2ED9C5EEDC25B), + SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0xFDFD2E46BB2EFD1A), + SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x4444495E1A4944CC), + SPH_C64(0xA1A11FC0611FA1FE), SPH_C64(0x4C4C61165A614CD4), + SPH_C64(0x3333FFB685FF3355), SPH_C64(0xC5C5F6A366F6C552), + SPH_C64(0x8484AED054AE8491), SPH_C64(0x2323AF2605AF2365), + SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0xB0B04A59E94AB0CD), + SPH_C64(0x2525B11035B1256F), SPH_C64(0x151541BDA841153F), + SPH_C64(0x3535E180B5E1355F), SPH_C64(0x6969D0066FD069BB), + SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x9494FE40D4FE94A1), + SPH_C64(0x4D4D641F52644DD7), SPH_C64(0x7070ADD7A7AD7090), + SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xAFAF29BE1129AFEC), + SPH_C64(0xCDCDDEEB26DECD4A), SPH_C64(0xD6D6A928FEA9D667), + SPH_C64(0x6C6CC12B47C16CB4), SPH_C64(0xB7B75166D151B7C4), + SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x09092D41482D091B), + SPH_C64(0xF3F31838CB18F308), SPH_C64(0x6767E6781FE667A9), + SPH_C64(0xA4A40EED490EA4F1), SPH_C64(0xEAEA65E90365EA23), + SPH_C64(0xECEC7BDF337BEC29), SPH_C64(0xB6B6546FD954B6C7), + SPH_C64(0xD4D4A33AEEA3D461), SPH_C64(0xD2D2BD0CDEBDD26B), + SPH_C64(0x141444B4A044143C), SPH_C64(0x1E1E66EEF0661E22), + SPH_C64(0xE1E142BA5B42E13E), SPH_C64(0x2424B4193DB4246C), + SPH_C64(0x3838D8E5DDD83848), SPH_C64(0xC6C6F9B87EF9C657), + SPH_C64(0xDBDB904D9690DB70), SPH_C64(0x4B4B7A29627A4BDD), + SPH_C64(0x7A7A8F8DF78F7A8E), SPH_C64(0x3A3AD2F7CDD23A4E), + SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x5E5E3B94CA3B5EE2), + SPH_C64(0xDFDF8469B684DF7C), SPH_C64(0x9595FB49DCFB95A2), + SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0xAAAA38933938AAE3), + SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xCECED1F03ED1CE4F), + SPH_C64(0x07071B3F381B0709), SPH_C64(0x0F0F337778330F11), + SPH_C64(0x3D3DC9C8F5C93D47), SPH_C64(0x585825A2FA2558E8), + SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x9898C22CB4C298B5), + SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0xF2F21D31C31DF20B), + SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x1111559988551133), + SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x8B8B9DA72C9D8B80), + SPH_C64(0x43435261225243C5), SPH_C64(0x03030F1B180F0305), + SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0xDCDC8B72AE8BDC79), + SPH_C64(0xE5E5569E7B56E532), SPH_C64(0xB2B2404BF940B2CB), + SPH_C64(0x4E4E6B044A6B4ED2), SPH_C64(0xC7C7FCB176FCC754), + SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0xE9E96AF21B6AE926), + SPH_C64(0x2727BB0225BB2769), SPH_C64(0x40405D7A3A5D40C0), + SPH_C64(0xD8D89F568E9FD875), SPH_C64(0x3737EB92A5EB3759), + SPH_C64(0x9292E076E4E092AB), SPH_C64(0x8F8F89830C898F8C), + SPH_C64(0x0101050908050103), SPH_C64(0x1D1D69F5E8691D27), + SPH_C64(0x535302F1A20253F5), SPH_C64(0x3E3EC6D3EDC63E42), + SPH_C64(0x595920ABF22059EB), SPH_C64(0xC1C1E28746E2C15E), + SPH_C64(0x4F4F6E0D426E4FD1), SPH_C64(0x3232FABF8DFA3256), + SPH_C64(0x16164EA6B04E163A), SPH_C64(0xFAFA35798335FA13), + SPH_C64(0x7474B9F387B9749C), SPH_C64(0xFBFB30708B30FB10), + SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x9F9FD9138CD99FBC), + SPH_C64(0x3434E489BDE4345C), SPH_C64(0x1A1A72CAD0721A2E), + SPH_C64(0x2A2A82674D822A7E), SPH_C64(0x5A5A2FB0EA2F5AEE), + SPH_C64(0x8D8D83911C838D8A), SPH_C64(0xC9C9CACF06CAC946), + SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0xF6F60915E309F607), + SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x282888755D882878), + SPH_C64(0x888892BC34928885), SPH_C64(0x9B9BCD37ACCD9BB0), + SPH_C64(0x3131F5A495F53153), SPH_C64(0x0E0E367E70360E12), + SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x4A4A7F206A7F4ADE), + SPH_C64(0xE8E86FFB136FE825), SPH_C64(0x9696F452C4F496A7), + SPH_C64(0xA6A604FF5904A6F7), SPH_C64(0x0C0C3C6C603C0C14), + SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x79798096EF80798B), + SPH_C64(0xBCBC76358976BCD9), SPH_C64(0xBEBE7C27997CBEDF), + SPH_C64(0xEFEF74C42B74EF2C), SPH_C64(0x6E6ECB3957CB6EB2), + SPH_C64(0x4646434C0A4346CA), SPH_C64(0x9797F15BCCF197A4), + SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0xEDED7ED63B7EED2A), + SPH_C64(0x19197DD1C87D192B), SPH_C64(0xD9D99A5F869AD976), + SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x9999C725BCC799B6), + SPH_C64(0xA8A832812932A8E5), SPH_C64(0x29298D7C558D297B), + SPH_C64(0x6464E96307E964AC), SPH_C64(0x1F1F63E7F8631F21), + SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x55551CC7921C55FF), + SPH_C64(0x13135F8B985F1335), SPH_C64(0xBBBB6D0AB16DBBD0), + SPH_C64(0xF7F70C1CEB0CF704), SPH_C64(0x6F6FCE305FCE6FB1), + SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x47474645024647C9), + SPH_C64(0x2F2F934A65932F71), SPH_C64(0xEEEE71CD2371EE2F), + SPH_C64(0xB8B86211A962B8D5), SPH_C64(0x7B7B8A84FF8A7B8D), + SPH_C64(0x898997B53C978986), SPH_C64(0x3030F0AD9DF03050), + SPH_C64(0xD3D3B805D6B8D368), SPH_C64(0x7F7F9EA0DF9E7F81), + SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B) +}; + +static const sph_u64 old0_T7[256] = { + SPH_C64(0x68D50F67D568B868), SPH_C64(0xD0B71ECEB7D06DD0), + SPH_C64(0xEB60E00B60EB20EB), SPH_C64(0x2B876E45872B7D2B), + SPH_C64(0x4875327A7548D848), SPH_C64(0x9DD3019CD39DBA9D), + SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0xE453977353E431E4), + SPH_C64(0xE348A84B48E338E3), SPH_C64(0xA315D27115A3F8A3), + SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x81BFFD7CBF819E81), + SPH_C64(0x7D94B2CF947D877D), SPH_C64(0xF1122ADB12F10EF1), + SPH_C64(0x85ABD95CAB859285), SPH_C64(0x9EDC1A84DC9EBF9E), + SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0x8E8C8A048C8E8F8E), + SPH_C64(0x78859FE785788878), SPH_C64(0xCAC5D41EC5CA43CA), + SPH_C64(0x174BAFB84B173917), SPH_C64(0xA937882137A9E6A9), + SPH_C64(0x61F84E2FF861A361), SPH_C64(0xD5A633E6A6D562D5), + SPH_C64(0x5D348FD2345DE75D), SPH_C64(0x0B275358270B1D0B), + SPH_C64(0x8C869814868C898C), SPH_C64(0x3CCCC1FDCC3C443C), + SPH_C64(0x77B6E89FB6779977), SPH_C64(0x5108E3B20851F351), + SPH_C64(0x22AA2F0DAA226622), SPH_C64(0x4257682A5742C642), + SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x5419CE9A1954FC54), + SPH_C64(0x415873325841C341), SPH_C64(0x80BAF474BA809D80), + SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x86A4C244A4869786), + SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x1878D8C078182818), + SPH_C64(0x2E96436D962E722E), SPH_C64(0x5716D5821657F957), + SPH_C64(0x061E36301E060A06), SPH_C64(0x62F75537F762A662), + SPH_C64(0xF40307F303F401F4), SPH_C64(0x36EE9BADEE365A36), + SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0x6BDA147FDA6BBD6B), + SPH_C64(0x1B77C3D8771B2D1B), SPH_C64(0x65EC6A0FEC65AF65), + SPH_C64(0x75BCFA8FBC759F75), SPH_C64(0x1050908050103010), + SPH_C64(0xDA95449E95DA73DA), SPH_C64(0x49703B727049DB49), + SPH_C64(0x26BE0B2DBE266A26), SPH_C64(0xF93A629B3AF916F9), + SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x66E37117E366AA66), + SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0xBA6803B968BAD3BA), + SPH_C64(0xAE2CB7192CAEEFAE), SPH_C64(0x500DEABA0D50F050), + SPH_C64(0x5207F8AA0752F652), SPH_C64(0xAB3D9A313DABE0AB), + SPH_C64(0x05112D2811050F05), SPH_C64(0xF01723D317F00DF0), + SPH_C64(0x0D396568390D170D), SPH_C64(0x73A2CCBFA2739573), + SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0x0414242014040C04), + SPH_C64(0x20A03D1DA0206020), SPH_C64(0xFE215DA321FE1FFE), + SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0xF5060EFB06F502F5), + SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5F3E9DC23E5FE15F), + SPH_C64(0x0A225A50220A1E0A), SPH_C64(0xB55B74C15BB5C2B5), + SPH_C64(0xC0E78E4EE7C05DC0), SPH_C64(0xA01AC9691AA0FDA0), + SPH_C64(0x71A8DEAFA8719371), SPH_C64(0xA50BE4410BA5F2A5), + SPH_C64(0x2D995875992D772D), SPH_C64(0x60FD4727FD60A060), + SPH_C64(0x72A7C5B7A7729672), SPH_C64(0x93E57FECE593A893), + SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x0828484028081808), + SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x21A53415A5216321), + SPH_C64(0x5C3186DA315CE45C), SPH_C64(0x87A1CB4CA1879487), + SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xE047B35347E03DE0), + SPH_C64(0x0000000000000000), SPH_C64(0xC3E89556E8C358C3), + SPH_C64(0x125A82905A123612), SPH_C64(0x91EF6DFCEF91AE91), + SPH_C64(0x8A98AE24988A838A), SPH_C64(0x020A12100A020602), + SPH_C64(0x1C6CFCE06C1C241C), SPH_C64(0xE659856359E637E6), + SPH_C64(0x454C57124C45CF45), SPH_C64(0xC2ED9C5EEDC25BC2), + SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0xFD2E46BB2EFD1AFD), + SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x44495E1A4944CC44), + SPH_C64(0xA11FC0611FA1FEA1), SPH_C64(0x4C61165A614CD44C), + SPH_C64(0x33FFB685FF335533), SPH_C64(0xC5F6A366F6C552C5), + SPH_C64(0x84AED054AE849184), SPH_C64(0x23AF2605AF236523), + SPH_C64(0x7C91BBC7917C847C), SPH_C64(0xB04A59E94AB0CDB0), + SPH_C64(0x25B11035B1256F25), SPH_C64(0x1541BDA841153F15), + SPH_C64(0x35E180B5E1355F35), SPH_C64(0x69D0066FD069BB69), + SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x94FE40D4FE94A194), + SPH_C64(0x4D641F52644DD74D), SPH_C64(0x70ADD7A7AD709070), + SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xAF29BE1129AFECAF), + SPH_C64(0xCDDEEB26DECD4ACD), SPH_C64(0xD6A928FEA9D667D6), + SPH_C64(0x6CC12B47C16CB46C), SPH_C64(0xB75166D151B7C4B7), + SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x092D41482D091B09), + SPH_C64(0xF31838CB18F308F3), SPH_C64(0x67E6781FE667A967), + SPH_C64(0xA40EED490EA4F1A4), SPH_C64(0xEA65E90365EA23EA), + SPH_C64(0xEC7BDF337BEC29EC), SPH_C64(0xB6546FD954B6C7B6), + SPH_C64(0xD4A33AEEA3D461D4), SPH_C64(0xD2BD0CDEBDD26BD2), + SPH_C64(0x1444B4A044143C14), SPH_C64(0x1E66EEF0661E221E), + SPH_C64(0xE142BA5B42E13EE1), SPH_C64(0x24B4193DB4246C24), + SPH_C64(0x38D8E5DDD8384838), SPH_C64(0xC6F9B87EF9C657C6), + SPH_C64(0xDB904D9690DB70DB), SPH_C64(0x4B7A29627A4BDD4B), + SPH_C64(0x7A8F8DF78F7A8E7A), SPH_C64(0x3AD2F7CDD23A4E3A), + SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x5E3B94CA3B5EE25E), + SPH_C64(0xDF8469B684DF7CDF), SPH_C64(0x95FB49DCFB95A295), + SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0xAA38933938AAE3AA), + SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xCED1F03ED1CE4FCE), + SPH_C64(0x071B3F381B070907), SPH_C64(0x0F337778330F110F), + SPH_C64(0x3DC9C8F5C93D473D), SPH_C64(0x5825A2FA2558E858), + SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x98C22CB4C298B598), + SPH_C64(0x9CD60894D69CB99C), SPH_C64(0xF21D31C31DF20BF2), + SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x1155998855113311), + SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x8B9DA72C9D8B808B), + SPH_C64(0x435261225243C543), SPH_C64(0x030F1B180F030503), + SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0xDC8B72AE8BDC79DC), + SPH_C64(0xE5569E7B56E532E5), SPH_C64(0xB2404BF940B2CBB2), + SPH_C64(0x4E6B044A6B4ED24E), SPH_C64(0xC7FCB176FCC754C7), + SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0xE96AF21B6AE926E9), + SPH_C64(0x27BB0225BB276927), SPH_C64(0x405D7A3A5D40C040), + SPH_C64(0xD89F568E9FD875D8), SPH_C64(0x37EB92A5EB375937), + SPH_C64(0x92E076E4E092AB92), SPH_C64(0x8F89830C898F8C8F), + SPH_C64(0x0105090805010301), SPH_C64(0x1D69F5E8691D271D), + SPH_C64(0x5302F1A20253F553), SPH_C64(0x3EC6D3EDC63E423E), + SPH_C64(0x5920ABF22059EB59), SPH_C64(0xC1E28746E2C15EC1), + SPH_C64(0x4F6E0D426E4FD14F), SPH_C64(0x32FABF8DFA325632), + SPH_C64(0x164EA6B04E163A16), SPH_C64(0xFA35798335FA13FA), + SPH_C64(0x74B9F387B9749C74), SPH_C64(0xFB30708B30FB10FB), + SPH_C64(0x63F25C3FF263A563), SPH_C64(0x9FD9138CD99FBC9F), + SPH_C64(0x34E489BDE4345C34), SPH_C64(0x1A72CAD0721A2E1A), + SPH_C64(0x2A82674D822A7E2A), SPH_C64(0x5A2FB0EA2F5AEE5A), + SPH_C64(0x8D83911C838D8A8D), SPH_C64(0xC9CACF06CAC946C9), + SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0xF60915E309F607F6), + SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x2888755D88287828), + SPH_C64(0x8892BC3492888588), SPH_C64(0x9BCD37ACCD9BB09B), + SPH_C64(0x31F5A495F5315331), SPH_C64(0x0E367E70360E120E), + SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x4A7F206A7F4ADE4A), + SPH_C64(0xE86FFB136FE825E8), SPH_C64(0x96F452C4F496A796), + SPH_C64(0xA604FF5904A6F7A6), SPH_C64(0x0C3C6C603C0C140C), + SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x798096EF80798B79), + SPH_C64(0xBC76358976BCD9BC), SPH_C64(0xBE7C27997CBEDFBE), + SPH_C64(0xEF74C42B74EF2CEF), SPH_C64(0x6ECB3957CB6EB26E), + SPH_C64(0x46434C0A4346CA46), SPH_C64(0x97F15BCCF197A497), + SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0xED7ED63B7EED2AED), + SPH_C64(0x197DD1C87D192B19), SPH_C64(0xD99A5F869AD976D9), + SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x99C725BCC799B699), + SPH_C64(0xA832812932A8E5A8), SPH_C64(0x298D7C558D297B29), + SPH_C64(0x64E96307E964AC64), SPH_C64(0x1F63E7F8631F211F), + SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x551CC7921C55FF55), + SPH_C64(0x135F8B985F133513), SPH_C64(0xBB6D0AB16DBBD0BB), + SPH_C64(0xF70C1CEB0CF704F7), SPH_C64(0x6FCE305FCE6FB16F), + SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x474645024647C947), + SPH_C64(0x2F934A65932F712F), SPH_C64(0xEE71CD2371EE2FEE), + SPH_C64(0xB86211A962B8D5B8), SPH_C64(0x7B8A84FF8A7B8D7B), + SPH_C64(0x8997B53C97898689), SPH_C64(0x30F0AD9DF0305030), + SPH_C64(0xD3B805D6B8D368D3), SPH_C64(0x7F9EA0DF9E7F817F), + SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82) +}; + +#endif + +static const sph_u64 old0_RC[10] = { + SPH_C64(0xE46A9D482BEBD068), + SPH_C64(0x9E85F17D8156A3E3), + SPH_C64(0xD561A917CA788E2C), + SPH_C64(0x422251773C8C0B5D), + SPH_C64(0x18B386CC8041543F), + SPH_C64(0x6BD136F46206572E), + SPH_C64(0xF92649DA1075651B), + SPH_C64(0xAB5250AEBAE766CB), + SPH_C64(0xFE20043B730DF005), + SPH_C64(0xA0C0B50A5FB4F5DD) +}; + +/* ====================================================================== */ +/* + * Constants for plain WHIRLPOOL-1 (second version). + */ + +static const sph_u64 old1_T0[256] = { + SPH_C64(0x78D8C07818281818), SPH_C64(0xAF2605AF23652323), + SPH_C64(0xF9B87EF9C657C6C6), SPH_C64(0x6FFB136FE825E8E8), + SPH_C64(0xA1CB4CA187948787), SPH_C64(0x6211A962B8D5B8B8), + SPH_C64(0x0509080501030101), SPH_C64(0x6E0D426E4FD14F4F), + SPH_C64(0xEE9BADEE365A3636), SPH_C64(0x04FF5904A6F7A6A6), + SPH_C64(0xBD0CDEBDD26BD2D2), SPH_C64(0x060EFB06F502F5F5), + SPH_C64(0x8096EF80798B7979), SPH_C64(0xCE305FCE6FB16F6F), + SPH_C64(0xEF6DFCEF91AE9191), SPH_C64(0x07F8AA0752F65252), + SPH_C64(0xFD4727FD60A06060), SPH_C64(0x76358976BCD9BCBC), + SPH_C64(0xCD37ACCD9BB09B9B), SPH_C64(0x8C8A048C8E8F8E8E), + SPH_C64(0x15D27115A3F8A3A3), SPH_C64(0x3C6C603C0C140C0C), + SPH_C64(0x8A84FF8A7B8D7B7B), SPH_C64(0xE180B5E1355F3535), + SPH_C64(0x69F5E8691D271D1D), SPH_C64(0x47B35347E03DE0E0), + SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xED9C5EEDC25BC2C2), + SPH_C64(0x96436D962E722E2E), SPH_C64(0x7A29627A4BDD4B4B), + SPH_C64(0x215DA321FE1FFEFE), SPH_C64(0x16D5821657F95757), + SPH_C64(0x41BDA841153F1515), SPH_C64(0xB6E89FB677997777), + SPH_C64(0xEB92A5EB37593737), SPH_C64(0x569E7B56E532E5E5), + SPH_C64(0xD9138CD99FBC9F9F), SPH_C64(0x1723D317F00DF0F0), + SPH_C64(0x7F206A7F4ADE4A4A), SPH_C64(0x95449E95DA73DADA), + SPH_C64(0x25A2FA2558E85858), SPH_C64(0xCACF06CAC946C9C9), + SPH_C64(0x8D7C558D297B2929), SPH_C64(0x225A50220A1E0A0A), + SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x1AC9691AA0FDA0A0), + SPH_C64(0xDA147FDA6BBD6B6B), SPH_C64(0xABD95CAB85928585), + SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x348FD2345DE75D5D), + SPH_C64(0x5090805010301010), SPH_C64(0x0307F303F401F4F4), + SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xC6D3EDC63E423E3E), + SPH_C64(0x112D2811050F0505), SPH_C64(0xE6781FE667A96767), + SPH_C64(0x53977353E431E4E4), SPH_C64(0xBB0225BB27692727), + SPH_C64(0x5873325841C34141), SPH_C64(0x9DA72C9D8B808B8B), + SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x94B2CF947D877D7D), + SPH_C64(0xFB49DCFB95A29595), SPH_C64(0x9F568E9FD875D8D8), + SPH_C64(0x30708B30FB10FBFB), SPH_C64(0x71CD2371EE2FEEEE), + SPH_C64(0x91BBC7917C847C7C), SPH_C64(0xE37117E366AA6666), + SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x4BAFB84B17391717), + SPH_C64(0x4645024647C94747), SPH_C64(0xDC1A84DC9EBF9E9E), + SPH_C64(0xC5D41EC5CA43CACA), SPH_C64(0x995875992D772D2D), + SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x1B3F381B07090707), + SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x2FB0EA2F5AEE5A5A), + SPH_C64(0xB5EF6CB583988383), SPH_C64(0xFFB685FF33553333), + SPH_C64(0xF25C3FF263A56363), SPH_C64(0x0A12100A02060202), + SPH_C64(0x38933938AAE3AAAA), SPH_C64(0xA8DEAFA871937171), + SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x7DD1C87D192B1919), + SPH_C64(0x703B727049DB4949), SPH_C64(0x9A5F869AD976D9D9), + SPH_C64(0x1D31C31DF20BF2F2), SPH_C64(0x48A84B48E338E3E3), + SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x92BC349288858888), + SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xBE0B2DBE266A2626), + SPH_C64(0xFABF8DFA32563232), SPH_C64(0x4A59E94AB0CDB0B0), + SPH_C64(0x6AF21B6AE926E9E9), SPH_C64(0x337778330F110F0F), + SPH_C64(0xA633E6A6D562D5D5), SPH_C64(0xBAF474BA809D8080), + SPH_C64(0x7C27997CBEDFBEBE), SPH_C64(0xDEEB26DECD4ACDCD), + SPH_C64(0xE489BDE4345C3434), SPH_C64(0x75327A7548D84848), + SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0x8F8DF78F7A8E7A7A), + SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x3E9DC23E5FE15F5F), + SPH_C64(0xA03D1DA020602020), SPH_C64(0xD50F67D568B86868), + SPH_C64(0x72CAD0721A2E1A1A), SPH_C64(0x2CB7192CAEEFAEAE), + SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x19CE9A1954FC5454), + SPH_C64(0xE57FECE593A89393), SPH_C64(0xAA2F0DAA22662222), + SPH_C64(0xE96307E964AC6464), SPH_C64(0x122ADB12F10EF1F1), + SPH_C64(0xA2CCBFA273957373), SPH_C64(0x5A82905A12361212), + SPH_C64(0x5D7A3A5D40C04040), SPH_C64(0x2848402808180808), + SPH_C64(0xE89556E8C358C3C3), SPH_C64(0x7BDF337BEC29ECEC), + SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x1FC0611FA1FEA1A1), + SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xC9C8F5C93D473D3D), + SPH_C64(0xF15BCCF197A49797), SPH_C64(0x0000000000000000), + SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x876E45872B7D2B2B), + SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282), + SPH_C64(0xA928FEA9D667D6D6), SPH_C64(0x77C3D8771B2D1B1B), + SPH_C64(0x5B74C15BB5C2B5B5), SPH_C64(0x29BE1129AFECAFAF), + SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x0DEABA0D50F05050), + SPH_C64(0x4C57124C45CF4545), SPH_C64(0x1838CB18F308F3F3), + SPH_C64(0xF0AD9DF030503030), SPH_C64(0x74C42B74EF2CEFEF), + SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x1CC7921C55FF5555), + SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x65E90365EA23EAEA), + SPH_C64(0xEC6A0FEC65AF6565), SPH_C64(0x6803B968BAD3BABA), + SPH_C64(0x934A65932F712F2F), SPH_C64(0xE78E4EE7C05DC0C0), + SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x6CFCE06C1C241C1C), + SPH_C64(0x2E46BB2EFD1AFDFD), SPH_C64(0x641F52644DD74D4D), + SPH_C64(0xE076E4E092AB9292), SPH_C64(0xBCFA8FBC759F7575), + SPH_C64(0x1E36301E060A0606), SPH_C64(0x98AE24988A838A8A), + SPH_C64(0x404BF940B2CBB2B2), SPH_C64(0x59856359E637E6E6), + SPH_C64(0x367E70360E120E0E), SPH_C64(0x63E7F8631F211F1F), + SPH_C64(0xF75537F762A66262), SPH_C64(0xA33AEEA3D461D4D4), + SPH_C64(0x32812932A8E5A8A8), SPH_C64(0xF452C4F496A79696), + SPH_C64(0x3A629B3AF916F9F9), SPH_C64(0xF6A366F6C552C5C5), + SPH_C64(0xB11035B1256F2525), SPH_C64(0x20ABF22059EB5959), + SPH_C64(0xAED054AE84918484), SPH_C64(0xA7C5B7A772967272), + SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x61165A614CD44C4C), + SPH_C64(0x3B94CA3B5EE25E5E), SPH_C64(0x859FE78578887878), + SPH_C64(0xD8E5DDD838483838), SPH_C64(0x869814868C898C8C), + SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0x0BE4410BA5F2A5A5), + SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0xF84E2FF861A36161), + SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0xA53415A521632121), + SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x66EEF0661E221E1E), + SPH_C64(0x5261225243C54343), SPH_C64(0xFCB176FCC754C7C7), + SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x14242014040C0404), + SPH_C64(0x08E3B20851F35151), SPH_C64(0xC725BCC799B69999), + SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x396568390D170D0D), + SPH_C64(0x35798335FA13FAFA), SPH_C64(0x8469B684DF7CDFDF), + SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0xB4193DB4246C2424), + SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x3D9A313DABE0ABAB), + SPH_C64(0xD1F03ED1CE4FCECE), SPH_C64(0x5599885511331111), + SPH_C64(0x89830C898F8C8F8F), SPH_C64(0x6B044A6B4ED24E4E), + SPH_C64(0x5166D151B7C4B7B7), SPH_C64(0x60E00B60EB20EBEB), + SPH_C64(0xCCC1FDCC3C443C3C), SPH_C64(0xBFFD7CBF819E8181), + SPH_C64(0xFE40D4FE94A19494), SPH_C64(0x0C1CEB0CF704F7F7), + SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x5F8B985F13351313), + SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0xB805D6B8D368D3D3), + SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0xCB3957CB6EB26E6E), + SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x0F1B180F03050303), + SPH_C64(0x13DC8A1356FA5656), SPH_C64(0x495E1A4944CC4444), + SPH_C64(0x9EA0DF9E7F817F7F), SPH_C64(0x37882137A9E6A9A9), + SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x6D0AB16DBBD0BBBB), + SPH_C64(0xE28746E2C15EC1C1), SPH_C64(0x02F1A20253F55353), + SPH_C64(0x8B72AE8BDC79DCDC), SPH_C64(0x275358270B1D0B0B), + SPH_C64(0xD3019CD39DBA9D9D), SPH_C64(0xC12B47C16CB46C6C), + SPH_C64(0xF5A495F531533131), SPH_C64(0xB9F387B9749C7474), + SPH_C64(0x0915E309F607F6F6), SPH_C64(0x434C0A4346CA4646), + SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0x97B53C9789868989), + SPH_C64(0x44B4A044143C1414), SPH_C64(0x42BA5B42E13EE1E1), + SPH_C64(0x4EA6B04E163A1616), SPH_C64(0xD2F7CDD23A4E3A3A), + SPH_C64(0xD0066FD069BB6969), SPH_C64(0x2D41482D091B0909), + SPH_C64(0xADD7A7AD70907070), SPH_C64(0x546FD954B6C7B6B6), + SPH_C64(0xB71ECEB7D06DD0D0), SPH_C64(0x7ED63B7EED2AEDED), + SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0x57682A5742C64242), + SPH_C64(0xC22CB4C298B59898), SPH_C64(0x0EED490EA4F1A4A4), + SPH_C64(0x88755D8828782828), SPH_C64(0x3186DA315CE45C5C), + SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0xA4C244A486978686) +}; + +#if !SPH_SMALL_FOOTPRINT_WHIRLPOOL + +static const sph_u64 old1_T1[256] = { + SPH_C64(0xD8C0781828181878), SPH_C64(0x2605AF23652323AF), + SPH_C64(0xB87EF9C657C6C6F9), SPH_C64(0xFB136FE825E8E86F), + SPH_C64(0xCB4CA187948787A1), SPH_C64(0x11A962B8D5B8B862), + SPH_C64(0x0908050103010105), SPH_C64(0x0D426E4FD14F4F6E), + SPH_C64(0x9BADEE365A3636EE), SPH_C64(0xFF5904A6F7A6A604), + SPH_C64(0x0CDEBDD26BD2D2BD), SPH_C64(0x0EFB06F502F5F506), + SPH_C64(0x96EF80798B797980), SPH_C64(0x305FCE6FB16F6FCE), + SPH_C64(0x6DFCEF91AE9191EF), SPH_C64(0xF8AA0752F6525207), + SPH_C64(0x4727FD60A06060FD), SPH_C64(0x358976BCD9BCBC76), + SPH_C64(0x37ACCD9BB09B9BCD), SPH_C64(0x8A048C8E8F8E8E8C), + SPH_C64(0xD27115A3F8A3A315), SPH_C64(0x6C603C0C140C0C3C), + SPH_C64(0x84FF8A7B8D7B7B8A), SPH_C64(0x80B5E1355F3535E1), + SPH_C64(0xF5E8691D271D1D69), SPH_C64(0xB35347E03DE0E047), + SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0x9C5EEDC25BC2C2ED), + SPH_C64(0x436D962E722E2E96), SPH_C64(0x29627A4BDD4B4B7A), + SPH_C64(0x5DA321FE1FFEFE21), SPH_C64(0xD5821657F9575716), + SPH_C64(0xBDA841153F151541), SPH_C64(0xE89FB677997777B6), + SPH_C64(0x92A5EB37593737EB), SPH_C64(0x9E7B56E532E5E556), + SPH_C64(0x138CD99FBC9F9FD9), SPH_C64(0x23D317F00DF0F017), + SPH_C64(0x206A7F4ADE4A4A7F), SPH_C64(0x449E95DA73DADA95), + SPH_C64(0xA2FA2558E8585825), SPH_C64(0xCF06CAC946C9C9CA), + SPH_C64(0x7C558D297B29298D), SPH_C64(0x5A50220A1E0A0A22), + SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xC9691AA0FDA0A01A), + SPH_C64(0x147FDA6BBD6B6BDA), SPH_C64(0xD95CAB85928585AB), + SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x8FD2345DE75D5D34), + SPH_C64(0x9080501030101050), SPH_C64(0x07F303F401F4F403), + SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0xD3EDC63E423E3EC6), + SPH_C64(0x2D2811050F050511), SPH_C64(0x781FE667A96767E6), + SPH_C64(0x977353E431E4E453), SPH_C64(0x0225BB27692727BB), + SPH_C64(0x73325841C3414158), SPH_C64(0xA72C9D8B808B8B9D), + SPH_C64(0xF65101A7F4A7A701), SPH_C64(0xB2CF947D877D7D94), + SPH_C64(0x49DCFB95A29595FB), SPH_C64(0x568E9FD875D8D89F), + SPH_C64(0x708B30FB10FBFB30), SPH_C64(0xCD2371EE2FEEEE71), + SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x7117E366AA6666E3), + SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0xAFB84B173917174B), + SPH_C64(0x45024647C9474746), SPH_C64(0x1A84DC9EBF9E9EDC), + SPH_C64(0xD41EC5CA43CACAC5), SPH_C64(0x5875992D772D2D99), + SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x3F381B070907071B), + SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xB0EA2F5AEE5A5A2F), + SPH_C64(0xEF6CB583988383B5), SPH_C64(0xB685FF33553333FF), + SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x12100A020602020A), + SPH_C64(0x933938AAE3AAAA38), SPH_C64(0xDEAFA871937171A8), + SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0xD1C87D192B19197D), + SPH_C64(0x3B727049DB494970), SPH_C64(0x5F869AD976D9D99A), + SPH_C64(0x31C31DF20BF2F21D), SPH_C64(0xA84B48E338E3E348), + SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xBC34928885888892), + SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x0B2DBE266A2626BE), + SPH_C64(0xBF8DFA32563232FA), SPH_C64(0x59E94AB0CDB0B04A), + SPH_C64(0xF21B6AE926E9E96A), SPH_C64(0x7778330F110F0F33), + SPH_C64(0x33E6A6D562D5D5A6), SPH_C64(0xF474BA809D8080BA), + SPH_C64(0x27997CBEDFBEBE7C), SPH_C64(0xEB26DECD4ACDCDDE), + SPH_C64(0x89BDE4345C3434E4), SPH_C64(0x327A7548D8484875), + SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x8DF78F7A8E7A7A8F), + SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x9DC23E5FE15F5F3E), + SPH_C64(0x3D1DA020602020A0), SPH_C64(0x0F67D568B86868D5), + SPH_C64(0xCAD0721A2E1A1A72), SPH_C64(0xB7192CAEEFAEAE2C), + SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0xCE9A1954FC545419), + SPH_C64(0x7FECE593A89393E5), SPH_C64(0x2F0DAA22662222AA), + SPH_C64(0x6307E964AC6464E9), SPH_C64(0x2ADB12F10EF1F112), + SPH_C64(0xCCBFA273957373A2), SPH_C64(0x82905A123612125A), + SPH_C64(0x7A3A5D40C040405D), SPH_C64(0x4840280818080828), + SPH_C64(0x9556E8C358C3C3E8), SPH_C64(0xDF337BEC29ECEC7B), + SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0xC0611FA1FEA1A11F), + SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xC8F5C93D473D3DC9), + SPH_C64(0x5BCCF197A49797F1), SPH_C64(0x0000000000000000), + SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x6E45872B7D2B2B87), + SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0), + SPH_C64(0x28FEA9D667D6D6A9), SPH_C64(0xC3D8771B2D1B1B77), + SPH_C64(0x74C15BB5C2B5B55B), SPH_C64(0xBE1129AFECAFAF29), + SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0xEABA0D50F050500D), + SPH_C64(0x57124C45CF45454C), SPH_C64(0x38CB18F308F3F318), + SPH_C64(0xAD9DF030503030F0), SPH_C64(0xC42B74EF2CEFEF74), + SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xC7921C55FF55551C), + SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xE90365EA23EAEA65), + SPH_C64(0x6A0FEC65AF6565EC), SPH_C64(0x03B968BAD3BABA68), + SPH_C64(0x4A65932F712F2F93), SPH_C64(0x8E4EE7C05DC0C0E7), + SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0xFCE06C1C241C1C6C), + SPH_C64(0x46BB2EFD1AFDFD2E), SPH_C64(0x1F52644DD74D4D64), + SPH_C64(0x76E4E092AB9292E0), SPH_C64(0xFA8FBC759F7575BC), + SPH_C64(0x36301E060A06061E), SPH_C64(0xAE24988A838A8A98), + SPH_C64(0x4BF940B2CBB2B240), SPH_C64(0x856359E637E6E659), + SPH_C64(0x7E70360E120E0E36), SPH_C64(0xE7F8631F211F1F63), + SPH_C64(0x5537F762A66262F7), SPH_C64(0x3AEEA3D461D4D4A3), + SPH_C64(0x812932A8E5A8A832), SPH_C64(0x52C4F496A79696F4), + SPH_C64(0x629B3AF916F9F93A), SPH_C64(0xA366F6C552C5C5F6), + SPH_C64(0x1035B1256F2525B1), SPH_C64(0xABF22059EB595920), + SPH_C64(0xD054AE84918484AE), SPH_C64(0xC5B7A772967272A7), + SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x165A614CD44C4C61), + SPH_C64(0x94CA3B5EE25E5E3B), SPH_C64(0x9FE7857888787885), + SPH_C64(0xE5DDD838483838D8), SPH_C64(0x9814868C898C8C86), + SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0xE4410BA5F2A5A50B), + SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x4E2FF861A36161F8), + SPH_C64(0x42F145B3C8B3B345), SPH_C64(0x3415A521632121A5), + SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0xEEF0661E221E1E66), + SPH_C64(0x61225243C5434352), SPH_C64(0xB176FCC754C7C7FC), + SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x242014040C040414), + SPH_C64(0xE3B20851F3515108), SPH_C64(0x25BCC799B69999C7), + SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0x6568390D170D0D39), + SPH_C64(0x798335FA13FAFA35), SPH_C64(0x69B684DF7CDFDF84), + SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0x193DB4246C2424B4), + SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x9A313DABE0ABAB3D), + SPH_C64(0xF03ED1CE4FCECED1), SPH_C64(0x9988551133111155), + SPH_C64(0x830C898F8C8F8F89), SPH_C64(0x044A6B4ED24E4E6B), + SPH_C64(0x66D151B7C4B7B751), SPH_C64(0xE00B60EB20EBEB60), + SPH_C64(0xC1FDCC3C443C3CCC), SPH_C64(0xFD7CBF819E8181BF), + SPH_C64(0x40D4FE94A19494FE), SPH_C64(0x1CEB0CF704F7F70C), + SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x8B985F133513135F), + SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x05D6B8D368D3D3B8), + SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x3957CB6EB26E6ECB), + SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x1B180F030503030F), + SPH_C64(0xDC8A1356FA565613), SPH_C64(0x5E1A4944CC444449), + SPH_C64(0xA0DF9E7F817F7F9E), SPH_C64(0x882137A9E6A9A937), + SPH_C64(0x674D822A7E2A2A82), SPH_C64(0x0AB16DBBD0BBBB6D), + SPH_C64(0x8746E2C15EC1C1E2), SPH_C64(0xF1A20253F5535302), + SPH_C64(0x72AE8BDC79DCDC8B), SPH_C64(0x5358270B1D0B0B27), + SPH_C64(0x019CD39DBA9D9DD3), SPH_C64(0x2B47C16CB46C6CC1), + SPH_C64(0xA495F531533131F5), SPH_C64(0xF387B9749C7474B9), + SPH_C64(0x15E309F607F6F609), SPH_C64(0x4C0A4346CA464643), + SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0xB53C978986898997), + SPH_C64(0xB4A044143C141444), SPH_C64(0xBA5B42E13EE1E142), + SPH_C64(0xA6B04E163A16164E), SPH_C64(0xF7CDD23A4E3A3AD2), + SPH_C64(0x066FD069BB6969D0), SPH_C64(0x41482D091B09092D), + SPH_C64(0xD7A7AD70907070AD), SPH_C64(0x6FD954B6C7B6B654), + SPH_C64(0x1ECEB7D06DD0D0B7), SPH_C64(0xD63B7EED2AEDED7E), + SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0x682A5742C6424257), + SPH_C64(0x2CB4C298B59898C2), SPH_C64(0xED490EA4F1A4A40E), + SPH_C64(0x755D882878282888), SPH_C64(0x86DA315CE45C5C31), + SPH_C64(0x6B933FF815F8F83F), SPH_C64(0xC244A486978686A4) +}; + +static const sph_u64 old1_T2[256] = { + SPH_C64(0xC0781828181878D8), SPH_C64(0x05AF23652323AF26), + SPH_C64(0x7EF9C657C6C6F9B8), SPH_C64(0x136FE825E8E86FFB), + SPH_C64(0x4CA187948787A1CB), SPH_C64(0xA962B8D5B8B86211), + SPH_C64(0x0805010301010509), SPH_C64(0x426E4FD14F4F6E0D), + SPH_C64(0xADEE365A3636EE9B), SPH_C64(0x5904A6F7A6A604FF), + SPH_C64(0xDEBDD26BD2D2BD0C), SPH_C64(0xFB06F502F5F5060E), + SPH_C64(0xEF80798B79798096), SPH_C64(0x5FCE6FB16F6FCE30), + SPH_C64(0xFCEF91AE9191EF6D), SPH_C64(0xAA0752F6525207F8), + SPH_C64(0x27FD60A06060FD47), SPH_C64(0x8976BCD9BCBC7635), + SPH_C64(0xACCD9BB09B9BCD37), SPH_C64(0x048C8E8F8E8E8C8A), + SPH_C64(0x7115A3F8A3A315D2), SPH_C64(0x603C0C140C0C3C6C), + SPH_C64(0xFF8A7B8D7B7B8A84), SPH_C64(0xB5E1355F3535E180), + SPH_C64(0xE8691D271D1D69F5), SPH_C64(0x5347E03DE0E047B3), + SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x5EEDC25BC2C2ED9C), + SPH_C64(0x6D962E722E2E9643), SPH_C64(0x627A4BDD4B4B7A29), + SPH_C64(0xA321FE1FFEFE215D), SPH_C64(0x821657F9575716D5), + SPH_C64(0xA841153F151541BD), SPH_C64(0x9FB677997777B6E8), + SPH_C64(0xA5EB37593737EB92), SPH_C64(0x7B56E532E5E5569E), + SPH_C64(0x8CD99FBC9F9FD913), SPH_C64(0xD317F00DF0F01723), + SPH_C64(0x6A7F4ADE4A4A7F20), SPH_C64(0x9E95DA73DADA9544), + SPH_C64(0xFA2558E8585825A2), SPH_C64(0x06CAC946C9C9CACF), + SPH_C64(0x558D297B29298D7C), SPH_C64(0x50220A1E0A0A225A), + SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x691AA0FDA0A01AC9), + SPH_C64(0x7FDA6BBD6B6BDA14), SPH_C64(0x5CAB85928585ABD9), + SPH_C64(0x8173BDDABDBD733C), SPH_C64(0xD2345DE75D5D348F), + SPH_C64(0x8050103010105090), SPH_C64(0xF303F401F4F40307), + SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0xEDC63E423E3EC6D3), + SPH_C64(0x2811050F0505112D), SPH_C64(0x1FE667A96767E678), + SPH_C64(0x7353E431E4E45397), SPH_C64(0x25BB27692727BB02), + SPH_C64(0x325841C341415873), SPH_C64(0x2C9D8B808B8B9DA7), + SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0xCF947D877D7D94B2), + SPH_C64(0xDCFB95A29595FB49), SPH_C64(0x8E9FD875D8D89F56), + SPH_C64(0x8B30FB10FBFB3070), SPH_C64(0x2371EE2FEEEE71CD), + SPH_C64(0xC7917C847C7C91BB), SPH_C64(0x17E366AA6666E371), + SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xB84B173917174BAF), + SPH_C64(0x024647C947474645), SPH_C64(0x84DC9EBF9E9EDC1A), + SPH_C64(0x1EC5CA43CACAC5D4), SPH_C64(0x75992D772D2D9958), + SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x381B070907071B3F), + SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0xEA2F5AEE5A5A2FB0), + SPH_C64(0x6CB583988383B5EF), SPH_C64(0x85FF33553333FFB6), + SPH_C64(0x3FF263A56363F25C), SPH_C64(0x100A020602020A12), + SPH_C64(0x3938AAE3AAAA3893), SPH_C64(0xAFA871937171A8DE), + SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xC87D192B19197DD1), + SPH_C64(0x727049DB4949703B), SPH_C64(0x869AD976D9D99A5F), + SPH_C64(0xC31DF20BF2F21D31), SPH_C64(0x4B48E338E3E348A8), + SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x34928885888892BC), + SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0x2DBE266A2626BE0B), + SPH_C64(0x8DFA32563232FABF), SPH_C64(0xE94AB0CDB0B04A59), + SPH_C64(0x1B6AE926E9E96AF2), SPH_C64(0x78330F110F0F3377), + SPH_C64(0xE6A6D562D5D5A633), SPH_C64(0x74BA809D8080BAF4), + SPH_C64(0x997CBEDFBEBE7C27), SPH_C64(0x26DECD4ACDCDDEEB), + SPH_C64(0xBDE4345C3434E489), SPH_C64(0x7A7548D848487532), + SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xF78F7A8E7A7A8F8D), + SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0xC23E5FE15F5F3E9D), + SPH_C64(0x1DA020602020A03D), SPH_C64(0x67D568B86868D50F), + SPH_C64(0xD0721A2E1A1A72CA), SPH_C64(0x192CAEEFAEAE2CB7), + SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0x9A1954FC545419CE), + SPH_C64(0xECE593A89393E57F), SPH_C64(0x0DAA22662222AA2F), + SPH_C64(0x07E964AC6464E963), SPH_C64(0xDB12F10EF1F1122A), + SPH_C64(0xBFA273957373A2CC), SPH_C64(0x905A123612125A82), + SPH_C64(0x3A5D40C040405D7A), SPH_C64(0x4028081808082848), + SPH_C64(0x56E8C358C3C3E895), SPH_C64(0x337BEC29ECEC7BDF), + SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x611FA1FEA1A11FC0), + SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0xF5C93D473D3DC9C8), + SPH_C64(0xCCF197A49797F15B), SPH_C64(0x0000000000000000), + SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0x45872B7D2B2B876E), + SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6), + SPH_C64(0xFEA9D667D6D6A928), SPH_C64(0xD8771B2D1B1B77C3), + SPH_C64(0xC15BB5C2B5B55B74), SPH_C64(0x1129AFECAFAF29BE), + SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0xBA0D50F050500DEA), + SPH_C64(0x124C45CF45454C57), SPH_C64(0xCB18F308F3F31838), + SPH_C64(0x9DF030503030F0AD), SPH_C64(0x2B74EF2CEFEF74C4), + SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x921C55FF55551CC7), + SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x0365EA23EAEA65E9), + SPH_C64(0x0FEC65AF6565EC6A), SPH_C64(0xB968BAD3BABA6803), + SPH_C64(0x65932F712F2F934A), SPH_C64(0x4EE7C05DC0C0E78E), + SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xE06C1C241C1C6CFC), + SPH_C64(0xBB2EFD1AFDFD2E46), SPH_C64(0x52644DD74D4D641F), + SPH_C64(0xE4E092AB9292E076), SPH_C64(0x8FBC759F7575BCFA), + SPH_C64(0x301E060A06061E36), SPH_C64(0x24988A838A8A98AE), + SPH_C64(0xF940B2CBB2B2404B), SPH_C64(0x6359E637E6E65985), + SPH_C64(0x70360E120E0E367E), SPH_C64(0xF8631F211F1F63E7), + SPH_C64(0x37F762A66262F755), SPH_C64(0xEEA3D461D4D4A33A), + SPH_C64(0x2932A8E5A8A83281), SPH_C64(0xC4F496A79696F452), + SPH_C64(0x9B3AF916F9F93A62), SPH_C64(0x66F6C552C5C5F6A3), + SPH_C64(0x35B1256F2525B110), SPH_C64(0xF22059EB595920AB), + SPH_C64(0x54AE84918484AED0), SPH_C64(0xB7A772967272A7C5), + SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x5A614CD44C4C6116), + SPH_C64(0xCA3B5EE25E5E3B94), SPH_C64(0xE78578887878859F), + SPH_C64(0xDDD838483838D8E5), SPH_C64(0x14868C898C8C8698), + SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x410BA5F2A5A50BE4), + SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0x2FF861A36161F84E), + SPH_C64(0xF145B3C8B3B34542), SPH_C64(0x15A521632121A534), + SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xF0661E221E1E66EE), + SPH_C64(0x225243C543435261), SPH_C64(0x76FCC754C7C7FCB1), + SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x2014040C04041424), + SPH_C64(0xB20851F3515108E3), SPH_C64(0xBCC799B69999C725), + SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x68390D170D0D3965), + SPH_C64(0x8335FA13FAFA3579), SPH_C64(0xB684DF7CDFDF8469), + SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x3DB4246C2424B419), + SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x313DABE0ABAB3D9A), + SPH_C64(0x3ED1CE4FCECED1F0), SPH_C64(0x8855113311115599), + SPH_C64(0x0C898F8C8F8F8983), SPH_C64(0x4A6B4ED24E4E6B04), + SPH_C64(0xD151B7C4B7B75166), SPH_C64(0x0B60EB20EBEB60E0), + SPH_C64(0xFDCC3C443C3CCCC1), SPH_C64(0x7CBF819E8181BFFD), + SPH_C64(0xD4FE94A19494FE40), SPH_C64(0xEB0CF704F7F70C1C), + SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x985F133513135F8B), + SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0xD6B8D368D3D3B805), + SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0x57CB6EB26E6ECB39), + SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0x180F030503030F1B), + SPH_C64(0x8A1356FA565613DC), SPH_C64(0x1A4944CC4444495E), + SPH_C64(0xDF9E7F817F7F9EA0), SPH_C64(0x2137A9E6A9A93788), + SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xB16DBBD0BBBB6D0A), + SPH_C64(0x46E2C15EC1C1E287), SPH_C64(0xA20253F5535302F1), + SPH_C64(0xAE8BDC79DCDC8B72), SPH_C64(0x58270B1D0B0B2753), + SPH_C64(0x9CD39DBA9D9DD301), SPH_C64(0x47C16CB46C6CC12B), + SPH_C64(0x95F531533131F5A4), SPH_C64(0x87B9749C7474B9F3), + SPH_C64(0xE309F607F6F60915), SPH_C64(0x0A4346CA4646434C), + SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0x3C978986898997B5), + SPH_C64(0xA044143C141444B4), SPH_C64(0x5B42E13EE1E142BA), + SPH_C64(0xB04E163A16164EA6), SPH_C64(0xCDD23A4E3A3AD2F7), + SPH_C64(0x6FD069BB6969D006), SPH_C64(0x482D091B09092D41), + SPH_C64(0xA7AD70907070ADD7), SPH_C64(0xD954B6C7B6B6546F), + SPH_C64(0xCEB7D06DD0D0B71E), SPH_C64(0x3B7EED2AEDED7ED6), + SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x2A5742C642425768), + SPH_C64(0xB4C298B59898C22C), SPH_C64(0x490EA4F1A4A40EED), + SPH_C64(0x5D88287828288875), SPH_C64(0xDA315CE45C5C3186), + SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x44A486978686A4C2) +}; + +static const sph_u64 old1_T3[256] = { + SPH_C64(0x781828181878D8C0), SPH_C64(0xAF23652323AF2605), + SPH_C64(0xF9C657C6C6F9B87E), SPH_C64(0x6FE825E8E86FFB13), + SPH_C64(0xA187948787A1CB4C), SPH_C64(0x62B8D5B8B86211A9), + SPH_C64(0x0501030101050908), SPH_C64(0x6E4FD14F4F6E0D42), + SPH_C64(0xEE365A3636EE9BAD), SPH_C64(0x04A6F7A6A604FF59), + SPH_C64(0xBDD26BD2D2BD0CDE), SPH_C64(0x06F502F5F5060EFB), + SPH_C64(0x80798B79798096EF), SPH_C64(0xCE6FB16F6FCE305F), + SPH_C64(0xEF91AE9191EF6DFC), SPH_C64(0x0752F6525207F8AA), + SPH_C64(0xFD60A06060FD4727), SPH_C64(0x76BCD9BCBC763589), + SPH_C64(0xCD9BB09B9BCD37AC), SPH_C64(0x8C8E8F8E8E8C8A04), + SPH_C64(0x15A3F8A3A315D271), SPH_C64(0x3C0C140C0C3C6C60), + SPH_C64(0x8A7B8D7B7B8A84FF), SPH_C64(0xE1355F3535E180B5), + SPH_C64(0x691D271D1D69F5E8), SPH_C64(0x47E03DE0E047B353), + SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xEDC25BC2C2ED9C5E), + SPH_C64(0x962E722E2E96436D), SPH_C64(0x7A4BDD4B4B7A2962), + SPH_C64(0x21FE1FFEFE215DA3), SPH_C64(0x1657F9575716D582), + SPH_C64(0x41153F151541BDA8), SPH_C64(0xB677997777B6E89F), + SPH_C64(0xEB37593737EB92A5), SPH_C64(0x56E532E5E5569E7B), + SPH_C64(0xD99FBC9F9FD9138C), SPH_C64(0x17F00DF0F01723D3), + SPH_C64(0x7F4ADE4A4A7F206A), SPH_C64(0x95DA73DADA95449E), + SPH_C64(0x2558E8585825A2FA), SPH_C64(0xCAC946C9C9CACF06), + SPH_C64(0x8D297B29298D7C55), SPH_C64(0x220A1E0A0A225A50), + SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x1AA0FDA0A01AC969), + SPH_C64(0xDA6BBD6B6BDA147F), SPH_C64(0xAB85928585ABD95C), + SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x345DE75D5D348FD2), + SPH_C64(0x5010301010509080), SPH_C64(0x03F401F4F40307F3), + SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xC63E423E3EC6D3ED), + SPH_C64(0x11050F0505112D28), SPH_C64(0xE667A96767E6781F), + SPH_C64(0x53E431E4E4539773), SPH_C64(0xBB27692727BB0225), + SPH_C64(0x5841C34141587332), SPH_C64(0x9D8B808B8B9DA72C), + SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x947D877D7D94B2CF), + SPH_C64(0xFB95A29595FB49DC), SPH_C64(0x9FD875D8D89F568E), + SPH_C64(0x30FB10FBFB30708B), SPH_C64(0x71EE2FEEEE71CD23), + SPH_C64(0x917C847C7C91BBC7), SPH_C64(0xE366AA6666E37117), + SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x4B173917174BAFB8), + SPH_C64(0x4647C94747464502), SPH_C64(0xDC9EBF9E9EDC1A84), + SPH_C64(0xC5CA43CACAC5D41E), SPH_C64(0x992D772D2D995875), + SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x1B070907071B3F38), + SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x2F5AEE5A5A2FB0EA), + SPH_C64(0xB583988383B5EF6C), SPH_C64(0xFF33553333FFB685), + SPH_C64(0xF263A56363F25C3F), SPH_C64(0x0A020602020A1210), + SPH_C64(0x38AAE3AAAA389339), SPH_C64(0xA871937171A8DEAF), + SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x7D192B19197DD1C8), + SPH_C64(0x7049DB4949703B72), SPH_C64(0x9AD976D9D99A5F86), + SPH_C64(0x1DF20BF2F21D31C3), SPH_C64(0x48E338E3E348A84B), + SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x928885888892BC34), + SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xBE266A2626BE0B2D), + SPH_C64(0xFA32563232FABF8D), SPH_C64(0x4AB0CDB0B04A59E9), + SPH_C64(0x6AE926E9E96AF21B), SPH_C64(0x330F110F0F337778), + SPH_C64(0xA6D562D5D5A633E6), SPH_C64(0xBA809D8080BAF474), + SPH_C64(0x7CBEDFBEBE7C2799), SPH_C64(0xDECD4ACDCDDEEB26), + SPH_C64(0xE4345C3434E489BD), SPH_C64(0x7548D8484875327A), + SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0x8F7A8E7A7A8F8DF7), + SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x3E5FE15F5F3E9DC2), + SPH_C64(0xA020602020A03D1D), SPH_C64(0xD568B86868D50F67), + SPH_C64(0x721A2E1A1A72CAD0), SPH_C64(0x2CAEEFAEAE2CB719), + SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x1954FC545419CE9A), + SPH_C64(0xE593A89393E57FEC), SPH_C64(0xAA22662222AA2F0D), + SPH_C64(0xE964AC6464E96307), SPH_C64(0x12F10EF1F1122ADB), + SPH_C64(0xA273957373A2CCBF), SPH_C64(0x5A123612125A8290), + SPH_C64(0x5D40C040405D7A3A), SPH_C64(0x2808180808284840), + SPH_C64(0xE8C358C3C3E89556), SPH_C64(0x7BEC29ECEC7BDF33), + SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x1FA1FEA1A11FC061), + SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xC93D473D3DC9C8F5), + SPH_C64(0xF197A49797F15BCC), SPH_C64(0x0000000000000000), + SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x872B7D2B2B876E45), + SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664), + SPH_C64(0xA9D667D6D6A928FE), SPH_C64(0x771B2D1B1B77C3D8), + SPH_C64(0x5BB5C2B5B55B74C1), SPH_C64(0x29AFECAFAF29BE11), + SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x0D50F050500DEABA), + SPH_C64(0x4C45CF45454C5712), SPH_C64(0x18F308F3F31838CB), + SPH_C64(0xF030503030F0AD9D), SPH_C64(0x74EF2CEFEF74C42B), + SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1C55FF55551CC792), + SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x65EA23EAEA65E903), + SPH_C64(0xEC65AF6565EC6A0F), SPH_C64(0x68BAD3BABA6803B9), + SPH_C64(0x932F712F2F934A65), SPH_C64(0xE7C05DC0C0E78E4E), + SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x6C1C241C1C6CFCE0), + SPH_C64(0x2EFD1AFDFD2E46BB), SPH_C64(0x644DD74D4D641F52), + SPH_C64(0xE092AB9292E076E4), SPH_C64(0xBC759F7575BCFA8F), + SPH_C64(0x1E060A06061E3630), SPH_C64(0x988A838A8A98AE24), + SPH_C64(0x40B2CBB2B2404BF9), SPH_C64(0x59E637E6E6598563), + SPH_C64(0x360E120E0E367E70), SPH_C64(0x631F211F1F63E7F8), + SPH_C64(0xF762A66262F75537), SPH_C64(0xA3D461D4D4A33AEE), + SPH_C64(0x32A8E5A8A8328129), SPH_C64(0xF496A79696F452C4), + SPH_C64(0x3AF916F9F93A629B), SPH_C64(0xF6C552C5C5F6A366), + SPH_C64(0xB1256F2525B11035), SPH_C64(0x2059EB595920ABF2), + SPH_C64(0xAE84918484AED054), SPH_C64(0xA772967272A7C5B7), + SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x614CD44C4C61165A), + SPH_C64(0x3B5EE25E5E3B94CA), SPH_C64(0x8578887878859FE7), + SPH_C64(0xD838483838D8E5DD), SPH_C64(0x868C898C8C869814), + SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0x0BA5F2A5A50BE441), + SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0xF861A36161F84E2F), + SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0xA521632121A53415), + SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x661E221E1E66EEF0), + SPH_C64(0x5243C54343526122), SPH_C64(0xFCC754C7C7FCB176), + SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x14040C0404142420), + SPH_C64(0x0851F3515108E3B2), SPH_C64(0xC799B69999C725BC), + SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x390D170D0D396568), + SPH_C64(0x35FA13FAFA357983), SPH_C64(0x84DF7CDFDF8469B6), + SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0xB4246C2424B4193D), + SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x3DABE0ABAB3D9A31), + SPH_C64(0xD1CE4FCECED1F03E), SPH_C64(0x5511331111559988), + SPH_C64(0x898F8C8F8F89830C), SPH_C64(0x6B4ED24E4E6B044A), + SPH_C64(0x51B7C4B7B75166D1), SPH_C64(0x60EB20EBEB60E00B), + SPH_C64(0xCC3C443C3CCCC1FD), SPH_C64(0xBF819E8181BFFD7C), + SPH_C64(0xFE94A19494FE40D4), SPH_C64(0x0CF704F7F70C1CEB), + SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x5F133513135F8B98), + SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0xB8D368D3D3B805D6), + SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0xCB6EB26E6ECB3957), + SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x0F030503030F1B18), + SPH_C64(0x1356FA565613DC8A), SPH_C64(0x4944CC4444495E1A), + SPH_C64(0x9E7F817F7F9EA0DF), SPH_C64(0x37A9E6A9A9378821), + SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x6DBBD0BBBB6D0AB1), + SPH_C64(0xE2C15EC1C1E28746), SPH_C64(0x0253F5535302F1A2), + SPH_C64(0x8BDC79DCDC8B72AE), SPH_C64(0x270B1D0B0B275358), + SPH_C64(0xD39DBA9D9DD3019C), SPH_C64(0xC16CB46C6CC12B47), + SPH_C64(0xF531533131F5A495), SPH_C64(0xB9749C7474B9F387), + SPH_C64(0x09F607F6F60915E3), SPH_C64(0x4346CA4646434C0A), + SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0x978986898997B53C), + SPH_C64(0x44143C141444B4A0), SPH_C64(0x42E13EE1E142BA5B), + SPH_C64(0x4E163A16164EA6B0), SPH_C64(0xD23A4E3A3AD2F7CD), + SPH_C64(0xD069BB6969D0066F), SPH_C64(0x2D091B09092D4148), + SPH_C64(0xAD70907070ADD7A7), SPH_C64(0x54B6C7B6B6546FD9), + SPH_C64(0xB7D06DD0D0B71ECE), SPH_C64(0x7EED2AEDED7ED63B), + SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0x5742C6424257682A), + SPH_C64(0xC298B59898C22CB4), SPH_C64(0x0EA4F1A4A40EED49), + SPH_C64(0x882878282888755D), SPH_C64(0x315CE45C5C3186DA), + SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0xA486978686A4C244) +}; + +static const sph_u64 old1_T4[256] = { + SPH_C64(0x1828181878D8C078), SPH_C64(0x23652323AF2605AF), + SPH_C64(0xC657C6C6F9B87EF9), SPH_C64(0xE825E8E86FFB136F), + SPH_C64(0x87948787A1CB4CA1), SPH_C64(0xB8D5B8B86211A962), + SPH_C64(0x0103010105090805), SPH_C64(0x4FD14F4F6E0D426E), + SPH_C64(0x365A3636EE9BADEE), SPH_C64(0xA6F7A6A604FF5904), + SPH_C64(0xD26BD2D2BD0CDEBD), SPH_C64(0xF502F5F5060EFB06), + SPH_C64(0x798B79798096EF80), SPH_C64(0x6FB16F6FCE305FCE), + SPH_C64(0x91AE9191EF6DFCEF), SPH_C64(0x52F6525207F8AA07), + SPH_C64(0x60A06060FD4727FD), SPH_C64(0xBCD9BCBC76358976), + SPH_C64(0x9BB09B9BCD37ACCD), SPH_C64(0x8E8F8E8E8C8A048C), + SPH_C64(0xA3F8A3A315D27115), SPH_C64(0x0C140C0C3C6C603C), + SPH_C64(0x7B8D7B7B8A84FF8A), SPH_C64(0x355F3535E180B5E1), + SPH_C64(0x1D271D1D69F5E869), SPH_C64(0xE03DE0E047B35347), + SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xC25BC2C2ED9C5EED), + SPH_C64(0x2E722E2E96436D96), SPH_C64(0x4BDD4B4B7A29627A), + SPH_C64(0xFE1FFEFE215DA321), SPH_C64(0x57F9575716D58216), + SPH_C64(0x153F151541BDA841), SPH_C64(0x77997777B6E89FB6), + SPH_C64(0x37593737EB92A5EB), SPH_C64(0xE532E5E5569E7B56), + SPH_C64(0x9FBC9F9FD9138CD9), SPH_C64(0xF00DF0F01723D317), + SPH_C64(0x4ADE4A4A7F206A7F), SPH_C64(0xDA73DADA95449E95), + SPH_C64(0x58E8585825A2FA25), SPH_C64(0xC946C9C9CACF06CA), + SPH_C64(0x297B29298D7C558D), SPH_C64(0x0A1E0A0A225A5022), + SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xA0FDA0A01AC9691A), + SPH_C64(0x6BBD6B6BDA147FDA), SPH_C64(0x85928585ABD95CAB), + SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x5DE75D5D348FD234), + SPH_C64(0x1030101050908050), SPH_C64(0xF401F4F40307F303), + SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x3E423E3EC6D3EDC6), + SPH_C64(0x050F0505112D2811), SPH_C64(0x67A96767E6781FE6), + SPH_C64(0xE431E4E453977353), SPH_C64(0x27692727BB0225BB), + SPH_C64(0x41C3414158733258), SPH_C64(0x8B808B8B9DA72C9D), + SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x7D877D7D94B2CF94), + SPH_C64(0x95A29595FB49DCFB), SPH_C64(0xD875D8D89F568E9F), + SPH_C64(0xFB10FBFB30708B30), SPH_C64(0xEE2FEEEE71CD2371), + SPH_C64(0x7C847C7C91BBC791), SPH_C64(0x66AA6666E37117E3), + SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0x173917174BAFB84B), + SPH_C64(0x47C9474746450246), SPH_C64(0x9EBF9E9EDC1A84DC), + SPH_C64(0xCA43CACAC5D41EC5), SPH_C64(0x2D772D2D99587599), + SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x070907071B3F381B), + SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x5AEE5A5A2FB0EA2F), + SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x33553333FFB685FF), + SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x020602020A12100A), + SPH_C64(0xAAE3AAAA38933938), SPH_C64(0x71937171A8DEAFA8), + SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x192B19197DD1C87D), + SPH_C64(0x49DB4949703B7270), SPH_C64(0xD976D9D99A5F869A), + SPH_C64(0xF20BF2F21D31C31D), SPH_C64(0xE338E3E348A84B48), + SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0x8885888892BC3492), + SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x266A2626BE0B2DBE), + SPH_C64(0x32563232FABF8DFA), SPH_C64(0xB0CDB0B04A59E94A), + SPH_C64(0xE926E9E96AF21B6A), SPH_C64(0x0F110F0F33777833), + SPH_C64(0xD562D5D5A633E6A6), SPH_C64(0x809D8080BAF474BA), + SPH_C64(0xBEDFBEBE7C27997C), SPH_C64(0xCD4ACDCDDEEB26DE), + SPH_C64(0x345C3434E489BDE4), SPH_C64(0x48D8484875327A75), + SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x7A8E7A7A8F8DF78F), + SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x5FE15F5F3E9DC23E), + SPH_C64(0x20602020A03D1DA0), SPH_C64(0x68B86868D50F67D5), + SPH_C64(0x1A2E1A1A72CAD072), SPH_C64(0xAEEFAEAE2CB7192C), + SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x54FC545419CE9A19), + SPH_C64(0x93A89393E57FECE5), SPH_C64(0x22662222AA2F0DAA), + SPH_C64(0x64AC6464E96307E9), SPH_C64(0xF10EF1F1122ADB12), + SPH_C64(0x73957373A2CCBFA2), SPH_C64(0x123612125A82905A), + SPH_C64(0x40C040405D7A3A5D), SPH_C64(0x0818080828484028), + SPH_C64(0xC358C3C3E89556E8), SPH_C64(0xEC29ECEC7BDF337B), + SPH_C64(0xDB70DBDB904D9690), SPH_C64(0xA1FEA1A11FC0611F), + SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0x3D473D3DC9C8F5C9), + SPH_C64(0x97A49797F15BCCF1), SPH_C64(0x0000000000000000), + SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0x2B7D2B2B876E4587), + SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0), + SPH_C64(0xD667D6D6A928FEA9), SPH_C64(0x1B2D1B1B77C3D877), + SPH_C64(0xB5C2B5B55B74C15B), SPH_C64(0xAFECAFAF29BE1129), + SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0x50F050500DEABA0D), + SPH_C64(0x45CF45454C57124C), SPH_C64(0xF308F3F31838CB18), + SPH_C64(0x30503030F0AD9DF0), SPH_C64(0xEF2CEFEF74C42B74), + SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x55FF55551CC7921C), + SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xEA23EAEA65E90365), + SPH_C64(0x65AF6565EC6A0FEC), SPH_C64(0xBAD3BABA6803B968), + SPH_C64(0x2F712F2F934A6593), SPH_C64(0xC05DC0C0E78E4EE7), + SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x1C241C1C6CFCE06C), + SPH_C64(0xFD1AFDFD2E46BB2E), SPH_C64(0x4DD74D4D641F5264), + SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x759F7575BCFA8FBC), + SPH_C64(0x060A06061E36301E), SPH_C64(0x8A838A8A98AE2498), + SPH_C64(0xB2CBB2B2404BF940), SPH_C64(0xE637E6E659856359), + SPH_C64(0x0E120E0E367E7036), SPH_C64(0x1F211F1F63E7F863), + SPH_C64(0x62A66262F75537F7), SPH_C64(0xD461D4D4A33AEEA3), + SPH_C64(0xA8E5A8A832812932), SPH_C64(0x96A79696F452C4F4), + SPH_C64(0xF916F9F93A629B3A), SPH_C64(0xC552C5C5F6A366F6), + SPH_C64(0x256F2525B11035B1), SPH_C64(0x59EB595920ABF220), + SPH_C64(0x84918484AED054AE), SPH_C64(0x72967272A7C5B7A7), + SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x4CD44C4C61165A61), + SPH_C64(0x5EE25E5E3B94CA3B), SPH_C64(0x78887878859FE785), + SPH_C64(0x38483838D8E5DDD8), SPH_C64(0x8C898C8C86981486), + SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0xA5F2A5A50BE4410B), + SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0x61A36161F84E2FF8), + SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x21632121A53415A5), + SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0x1E221E1E66EEF066), + SPH_C64(0x43C5434352612252), SPH_C64(0xC754C7C7FCB176FC), + SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0x040C040414242014), + SPH_C64(0x51F3515108E3B208), SPH_C64(0x99B69999C725BCC7), + SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0x0D170D0D39656839), + SPH_C64(0xFA13FAFA35798335), SPH_C64(0xDF7CDFDF8469B684), + SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x246C2424B4193DB4), + SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0xABE0ABAB3D9A313D), + SPH_C64(0xCE4FCECED1F03ED1), SPH_C64(0x1133111155998855), + SPH_C64(0x8F8C8F8F89830C89), SPH_C64(0x4ED24E4E6B044A6B), + SPH_C64(0xB7C4B7B75166D151), SPH_C64(0xEB20EBEB60E00B60), + SPH_C64(0x3C443C3CCCC1FDCC), SPH_C64(0x819E8181BFFD7CBF), + SPH_C64(0x94A19494FE40D4FE), SPH_C64(0xF704F7F70C1CEB0C), + SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x133513135F8B985F), + SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0xD368D3D3B805D6B8), + SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0x6EB26E6ECB3957CB), + SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0x030503030F1B180F), + SPH_C64(0x56FA565613DC8A13), SPH_C64(0x44CC4444495E1A49), + SPH_C64(0x7F817F7F9EA0DF9E), SPH_C64(0xA9E6A9A937882137), + SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0xBBD0BBBB6D0AB16D), + SPH_C64(0xC15EC1C1E28746E2), SPH_C64(0x53F5535302F1A202), + SPH_C64(0xDC79DCDC8B72AE8B), SPH_C64(0x0B1D0B0B27535827), + SPH_C64(0x9DBA9D9DD3019CD3), SPH_C64(0x6CB46C6CC12B47C1), + SPH_C64(0x31533131F5A495F5), SPH_C64(0x749C7474B9F387B9), + SPH_C64(0xF607F6F60915E309), SPH_C64(0x46CA4646434C0A43), + SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x8986898997B53C97), + SPH_C64(0x143C141444B4A044), SPH_C64(0xE13EE1E142BA5B42), + SPH_C64(0x163A16164EA6B04E), SPH_C64(0x3A4E3A3AD2F7CDD2), + SPH_C64(0x69BB6969D0066FD0), SPH_C64(0x091B09092D41482D), + SPH_C64(0x70907070ADD7A7AD), SPH_C64(0xB6C7B6B6546FD954), + SPH_C64(0xD06DD0D0B71ECEB7), SPH_C64(0xED2AEDED7ED63B7E), + SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x42C6424257682A57), + SPH_C64(0x98B59898C22CB4C2), SPH_C64(0xA4F1A4A40EED490E), + SPH_C64(0x2878282888755D88), SPH_C64(0x5CE45C5C3186DA31), + SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x86978686A4C244A4) +}; + +static const sph_u64 old1_T5[256] = { + SPH_C64(0x28181878D8C07818), SPH_C64(0x652323AF2605AF23), + SPH_C64(0x57C6C6F9B87EF9C6), SPH_C64(0x25E8E86FFB136FE8), + SPH_C64(0x948787A1CB4CA187), SPH_C64(0xD5B8B86211A962B8), + SPH_C64(0x0301010509080501), SPH_C64(0xD14F4F6E0D426E4F), + SPH_C64(0x5A3636EE9BADEE36), SPH_C64(0xF7A6A604FF5904A6), + SPH_C64(0x6BD2D2BD0CDEBDD2), SPH_C64(0x02F5F5060EFB06F5), + SPH_C64(0x8B79798096EF8079), SPH_C64(0xB16F6FCE305FCE6F), + SPH_C64(0xAE9191EF6DFCEF91), SPH_C64(0xF6525207F8AA0752), + SPH_C64(0xA06060FD4727FD60), SPH_C64(0xD9BCBC76358976BC), + SPH_C64(0xB09B9BCD37ACCD9B), SPH_C64(0x8F8E8E8C8A048C8E), + SPH_C64(0xF8A3A315D27115A3), SPH_C64(0x140C0C3C6C603C0C), + SPH_C64(0x8D7B7B8A84FF8A7B), SPH_C64(0x5F3535E180B5E135), + SPH_C64(0x271D1D69F5E8691D), SPH_C64(0x3DE0E047B35347E0), + SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x5BC2C2ED9C5EEDC2), + SPH_C64(0x722E2E96436D962E), SPH_C64(0xDD4B4B7A29627A4B), + SPH_C64(0x1FFEFE215DA321FE), SPH_C64(0xF9575716D5821657), + SPH_C64(0x3F151541BDA84115), SPH_C64(0x997777B6E89FB677), + SPH_C64(0x593737EB92A5EB37), SPH_C64(0x32E5E5569E7B56E5), + SPH_C64(0xBC9F9FD9138CD99F), SPH_C64(0x0DF0F01723D317F0), + SPH_C64(0xDE4A4A7F206A7F4A), SPH_C64(0x73DADA95449E95DA), + SPH_C64(0xE8585825A2FA2558), SPH_C64(0x46C9C9CACF06CAC9), + SPH_C64(0x7B29298D7C558D29), SPH_C64(0x1E0A0A225A50220A), + SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0xFDA0A01AC9691AA0), + SPH_C64(0xBD6B6BDA147FDA6B), SPH_C64(0x928585ABD95CAB85), + SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xE75D5D348FD2345D), + SPH_C64(0x3010105090805010), SPH_C64(0x01F4F40307F303F4), + SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0x423E3EC6D3EDC63E), + SPH_C64(0x0F0505112D281105), SPH_C64(0xA96767E6781FE667), + SPH_C64(0x31E4E453977353E4), SPH_C64(0x692727BB0225BB27), + SPH_C64(0xC341415873325841), SPH_C64(0x808B8B9DA72C9D8B), + SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x877D7D94B2CF947D), + SPH_C64(0xA29595FB49DCFB95), SPH_C64(0x75D8D89F568E9FD8), + SPH_C64(0x10FBFB30708B30FB), SPH_C64(0x2FEEEE71CD2371EE), + SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xAA6666E37117E366), + SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x3917174BAFB84B17), + SPH_C64(0xC947474645024647), SPH_C64(0xBF9E9EDC1A84DC9E), + SPH_C64(0x43CACAC5D41EC5CA), SPH_C64(0x772D2D995875992D), + SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0x0907071B3F381B07), + SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xEE5A5A2FB0EA2F5A), + SPH_C64(0x988383B5EF6CB583), SPH_C64(0x553333FFB685FF33), + SPH_C64(0xA56363F25C3FF263), SPH_C64(0x0602020A12100A02), + SPH_C64(0xE3AAAA38933938AA), SPH_C64(0x937171A8DEAFA871), + SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x2B19197DD1C87D19), + SPH_C64(0xDB4949703B727049), SPH_C64(0x76D9D99A5F869AD9), + SPH_C64(0x0BF2F21D31C31DF2), SPH_C64(0x38E3E348A84B48E3), + SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x85888892BC349288), + SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0x6A2626BE0B2DBE26), + SPH_C64(0x563232FABF8DFA32), SPH_C64(0xCDB0B04A59E94AB0), + SPH_C64(0x26E9E96AF21B6AE9), SPH_C64(0x110F0F337778330F), + SPH_C64(0x62D5D5A633E6A6D5), SPH_C64(0x9D8080BAF474BA80), + SPH_C64(0xDFBEBE7C27997CBE), SPH_C64(0x4ACDCDDEEB26DECD), + SPH_C64(0x5C3434E489BDE434), SPH_C64(0xD8484875327A7548), + SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0x8E7A7A8F8DF78F7A), + SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0xE15F5F3E9DC23E5F), + SPH_C64(0x602020A03D1DA020), SPH_C64(0xB86868D50F67D568), + SPH_C64(0x2E1A1A72CAD0721A), SPH_C64(0xEFAEAE2CB7192CAE), + SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xFC545419CE9A1954), + SPH_C64(0xA89393E57FECE593), SPH_C64(0x662222AA2F0DAA22), + SPH_C64(0xAC6464E96307E964), SPH_C64(0x0EF1F1122ADB12F1), + SPH_C64(0x957373A2CCBFA273), SPH_C64(0x3612125A82905A12), + SPH_C64(0xC040405D7A3A5D40), SPH_C64(0x1808082848402808), + SPH_C64(0x58C3C3E89556E8C3), SPH_C64(0x29ECEC7BDF337BEC), + SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xFEA1A11FC0611FA1), + SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x473D3DC9C8F5C93D), + SPH_C64(0xA49797F15BCCF197), SPH_C64(0x0000000000000000), + SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x7D2B2B876E45872B), + SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082), + SPH_C64(0x67D6D6A928FEA9D6), SPH_C64(0x2D1B1B77C3D8771B), + SPH_C64(0xC2B5B55B74C15BB5), SPH_C64(0xECAFAF29BE1129AF), + SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0xF050500DEABA0D50), + SPH_C64(0xCF45454C57124C45), SPH_C64(0x08F3F31838CB18F3), + SPH_C64(0x503030F0AD9DF030), SPH_C64(0x2CEFEF74C42B74EF), + SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFF55551CC7921C55), + SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0x23EAEA65E90365EA), + SPH_C64(0xAF6565EC6A0FEC65), SPH_C64(0xD3BABA6803B968BA), + SPH_C64(0x712F2F934A65932F), SPH_C64(0x5DC0C0E78E4EE7C0), + SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0x241C1C6CFCE06C1C), + SPH_C64(0x1AFDFD2E46BB2EFD), SPH_C64(0xD74D4D641F52644D), + SPH_C64(0xAB9292E076E4E092), SPH_C64(0x9F7575BCFA8FBC75), + SPH_C64(0x0A06061E36301E06), SPH_C64(0x838A8A98AE24988A), + SPH_C64(0xCBB2B2404BF940B2), SPH_C64(0x37E6E659856359E6), + SPH_C64(0x120E0E367E70360E), SPH_C64(0x211F1F63E7F8631F), + SPH_C64(0xA66262F75537F762), SPH_C64(0x61D4D4A33AEEA3D4), + SPH_C64(0xE5A8A832812932A8), SPH_C64(0xA79696F452C4F496), + SPH_C64(0x16F9F93A629B3AF9), SPH_C64(0x52C5C5F6A366F6C5), + SPH_C64(0x6F2525B11035B125), SPH_C64(0xEB595920ABF22059), + SPH_C64(0x918484AED054AE84), SPH_C64(0x967272A7C5B7A772), + SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0xD44C4C61165A614C), + SPH_C64(0xE25E5E3B94CA3B5E), SPH_C64(0x887878859FE78578), + SPH_C64(0x483838D8E5DDD838), SPH_C64(0x898C8C869814868C), + SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xF2A5A50BE4410BA5), + SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0xA36161F84E2FF861), + SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x632121A53415A521), + SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x221E1E66EEF0661E), + SPH_C64(0xC543435261225243), SPH_C64(0x54C7C7FCB176FCC7), + SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0x0C04041424201404), + SPH_C64(0xF3515108E3B20851), SPH_C64(0xB69999C725BCC799), + SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x170D0D396568390D), + SPH_C64(0x13FAFA35798335FA), SPH_C64(0x7CDFDF8469B684DF), + SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x6C2424B4193DB424), + SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0xE0ABAB3D9A313DAB), + SPH_C64(0x4FCECED1F03ED1CE), SPH_C64(0x3311115599885511), + SPH_C64(0x8C8F8F89830C898F), SPH_C64(0xD24E4E6B044A6B4E), + SPH_C64(0xC4B7B75166D151B7), SPH_C64(0x20EBEB60E00B60EB), + SPH_C64(0x443C3CCCC1FDCC3C), SPH_C64(0x9E8181BFFD7CBF81), + SPH_C64(0xA19494FE40D4FE94), SPH_C64(0x04F7F70C1CEB0CF7), + SPH_C64(0xD6B9B96718A167B9), SPH_C64(0x3513135F8B985F13), + SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x68D3D3B805D6B8D3), + SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xB26E6ECB3957CB6E), + SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x0503030F1B180F03), + SPH_C64(0xFA565613DC8A1356), SPH_C64(0xCC4444495E1A4944), + SPH_C64(0x817F7F9EA0DF9E7F), SPH_C64(0xE6A9A937882137A9), + SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xD0BBBB6D0AB16DBB), + SPH_C64(0x5EC1C1E28746E2C1), SPH_C64(0xF5535302F1A20253), + SPH_C64(0x79DCDC8B72AE8BDC), SPH_C64(0x1D0B0B275358270B), + SPH_C64(0xBA9D9DD3019CD39D), SPH_C64(0xB46C6CC12B47C16C), + SPH_C64(0x533131F5A495F531), SPH_C64(0x9C7474B9F387B974), + SPH_C64(0x07F6F60915E309F6), SPH_C64(0xCA4646434C0A4346), + SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0x86898997B53C9789), + SPH_C64(0x3C141444B4A04414), SPH_C64(0x3EE1E142BA5B42E1), + SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x4E3A3AD2F7CDD23A), + SPH_C64(0xBB6969D0066FD069), SPH_C64(0x1B09092D41482D09), + SPH_C64(0x907070ADD7A7AD70), SPH_C64(0xC7B6B6546FD954B6), + SPH_C64(0x6DD0D0B71ECEB7D0), SPH_C64(0x2AEDED7ED63B7EED), + SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0xC6424257682A5742), + SPH_C64(0xB59898C22CB4C298), SPH_C64(0xF1A4A40EED490EA4), + SPH_C64(0x78282888755D8828), SPH_C64(0xE45C5C3186DA315C), + SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x978686A4C244A486) +}; + +static const sph_u64 old1_T6[256] = { + SPH_C64(0x181878D8C0781828), SPH_C64(0x2323AF2605AF2365), + SPH_C64(0xC6C6F9B87EF9C657), SPH_C64(0xE8E86FFB136FE825), + SPH_C64(0x8787A1CB4CA18794), SPH_C64(0xB8B86211A962B8D5), + SPH_C64(0x0101050908050103), SPH_C64(0x4F4F6E0D426E4FD1), + SPH_C64(0x3636EE9BADEE365A), SPH_C64(0xA6A604FF5904A6F7), + SPH_C64(0xD2D2BD0CDEBDD26B), SPH_C64(0xF5F5060EFB06F502), + SPH_C64(0x79798096EF80798B), SPH_C64(0x6F6FCE305FCE6FB1), + SPH_C64(0x9191EF6DFCEF91AE), SPH_C64(0x525207F8AA0752F6), + SPH_C64(0x6060FD4727FD60A0), SPH_C64(0xBCBC76358976BCD9), + SPH_C64(0x9B9BCD37ACCD9BB0), SPH_C64(0x8E8E8C8A048C8E8F), + SPH_C64(0xA3A315D27115A3F8), SPH_C64(0x0C0C3C6C603C0C14), + SPH_C64(0x7B7B8A84FF8A7B8D), SPH_C64(0x3535E180B5E1355F), + SPH_C64(0x1D1D69F5E8691D27), SPH_C64(0xE0E047B35347E03D), + SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xC2C2ED9C5EEDC25B), + SPH_C64(0x2E2E96436D962E72), SPH_C64(0x4B4B7A29627A4BDD), + SPH_C64(0xFEFE215DA321FE1F), SPH_C64(0x575716D5821657F9), + SPH_C64(0x151541BDA841153F), SPH_C64(0x7777B6E89FB67799), + SPH_C64(0x3737EB92A5EB3759), SPH_C64(0xE5E5569E7B56E532), + SPH_C64(0x9F9FD9138CD99FBC), SPH_C64(0xF0F01723D317F00D), + SPH_C64(0x4A4A7F206A7F4ADE), SPH_C64(0xDADA95449E95DA73), + SPH_C64(0x585825A2FA2558E8), SPH_C64(0xC9C9CACF06CAC946), + SPH_C64(0x29298D7C558D297B), SPH_C64(0x0A0A225A50220A1E), + SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xA0A01AC9691AA0FD), + SPH_C64(0x6B6BDA147FDA6BBD), SPH_C64(0x8585ABD95CAB8592), + SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x5D5D348FD2345DE7), + SPH_C64(0x1010509080501030), SPH_C64(0xF4F40307F303F401), + SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x3E3EC6D3EDC63E42), + SPH_C64(0x0505112D2811050F), SPH_C64(0x6767E6781FE667A9), + SPH_C64(0xE4E453977353E431), SPH_C64(0x2727BB0225BB2769), + SPH_C64(0x41415873325841C3), SPH_C64(0x8B8B9DA72C9D8B80), + SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x7D7D94B2CF947D87), + SPH_C64(0x9595FB49DCFB95A2), SPH_C64(0xD8D89F568E9FD875), + SPH_C64(0xFBFB30708B30FB10), SPH_C64(0xEEEE71CD2371EE2F), + SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0x6666E37117E366AA), + SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0x17174BAFB84B1739), + SPH_C64(0x47474645024647C9), SPH_C64(0x9E9EDC1A84DC9EBF), + SPH_C64(0xCACAC5D41EC5CA43), SPH_C64(0x2D2D995875992D77), + SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x07071B3F381B0709), + SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x5A5A2FB0EA2F5AEE), + SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x3333FFB685FF3355), + SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x02020A12100A0206), + SPH_C64(0xAAAA38933938AAE3), SPH_C64(0x7171A8DEAFA87193), + SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x19197DD1C87D192B), + SPH_C64(0x4949703B727049DB), SPH_C64(0xD9D99A5F869AD976), + SPH_C64(0xF2F21D31C31DF20B), SPH_C64(0xE3E348A84B48E338), + SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0x888892BC34928885), + SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x2626BE0B2DBE266A), + SPH_C64(0x3232FABF8DFA3256), SPH_C64(0xB0B04A59E94AB0CD), + SPH_C64(0xE9E96AF21B6AE926), SPH_C64(0x0F0F337778330F11), + SPH_C64(0xD5D5A633E6A6D562), SPH_C64(0x8080BAF474BA809D), + SPH_C64(0xBEBE7C27997CBEDF), SPH_C64(0xCDCDDEEB26DECD4A), + SPH_C64(0x3434E489BDE4345C), SPH_C64(0x484875327A7548D8), + SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x7A7A8F8DF78F7A8E), + SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x5F5F3E9DC23E5FE1), + SPH_C64(0x2020A03D1DA02060), SPH_C64(0x6868D50F67D568B8), + SPH_C64(0x1A1A72CAD0721A2E), SPH_C64(0xAEAE2CB7192CAEEF), + SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x545419CE9A1954FC), + SPH_C64(0x9393E57FECE593A8), SPH_C64(0x2222AA2F0DAA2266), + SPH_C64(0x6464E96307E964AC), SPH_C64(0xF1F1122ADB12F10E), + SPH_C64(0x7373A2CCBFA27395), SPH_C64(0x12125A82905A1236), + SPH_C64(0x40405D7A3A5D40C0), SPH_C64(0x0808284840280818), + SPH_C64(0xC3C3E89556E8C358), SPH_C64(0xECEC7BDF337BEC29), + SPH_C64(0xDBDB904D9690DB70), SPH_C64(0xA1A11FC0611FA1FE), + SPH_C64(0x8D8D83911C838D8A), SPH_C64(0x3D3DC9C8F5C93D47), + SPH_C64(0x9797F15BCCF197A4), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0x2B2B876E45872B7D), + SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B), + SPH_C64(0xD6D6A928FEA9D667), SPH_C64(0x1B1B77C3D8771B2D), + SPH_C64(0xB5B55B74C15BB5C2), SPH_C64(0xAFAF29BE1129AFEC), + SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0x50500DEABA0D50F0), + SPH_C64(0x45454C57124C45CF), SPH_C64(0xF3F31838CB18F308), + SPH_C64(0x3030F0AD9DF03050), SPH_C64(0xEFEF74C42B74EF2C), + SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x55551CC7921C55FF), + SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xEAEA65E90365EA23), + SPH_C64(0x6565EC6A0FEC65AF), SPH_C64(0xBABA6803B968BAD3), + SPH_C64(0x2F2F934A65932F71), SPH_C64(0xC0C0E78E4EE7C05D), + SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x1C1C6CFCE06C1C24), + SPH_C64(0xFDFD2E46BB2EFD1A), SPH_C64(0x4D4D641F52644DD7), + SPH_C64(0x9292E076E4E092AB), SPH_C64(0x7575BCFA8FBC759F), + SPH_C64(0x06061E36301E060A), SPH_C64(0x8A8A98AE24988A83), + SPH_C64(0xB2B2404BF940B2CB), SPH_C64(0xE6E659856359E637), + SPH_C64(0x0E0E367E70360E12), SPH_C64(0x1F1F63E7F8631F21), + SPH_C64(0x6262F75537F762A6), SPH_C64(0xD4D4A33AEEA3D461), + SPH_C64(0xA8A832812932A8E5), SPH_C64(0x9696F452C4F496A7), + SPH_C64(0xF9F93A629B3AF916), SPH_C64(0xC5C5F6A366F6C552), + SPH_C64(0x2525B11035B1256F), SPH_C64(0x595920ABF22059EB), + SPH_C64(0x8484AED054AE8491), SPH_C64(0x7272A7C5B7A77296), + SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x4C4C61165A614CD4), + SPH_C64(0x5E5E3B94CA3B5EE2), SPH_C64(0x7878859FE7857888), + SPH_C64(0x3838D8E5DDD83848), SPH_C64(0x8C8C869814868C89), + SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0xA5A50BE4410BA5F2), + SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0x6161F84E2FF861A3), + SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x2121A53415A52163), + SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0x1E1E66EEF0661E22), + SPH_C64(0x43435261225243C5), SPH_C64(0xC7C7FCB176FCC754), + SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0x040414242014040C), + SPH_C64(0x515108E3B20851F3), SPH_C64(0x9999C725BCC799B6), + SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0x0D0D396568390D17), + SPH_C64(0xFAFA35798335FA13), SPH_C64(0xDFDF8469B684DF7C), + SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x2424B4193DB4246C), + SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0xABAB3D9A313DABE0), + SPH_C64(0xCECED1F03ED1CE4F), SPH_C64(0x1111559988551133), + SPH_C64(0x8F8F89830C898F8C), SPH_C64(0x4E4E6B044A6B4ED2), + SPH_C64(0xB7B75166D151B7C4), SPH_C64(0xEBEB60E00B60EB20), + SPH_C64(0x3C3CCCC1FDCC3C44), SPH_C64(0x8181BFFD7CBF819E), + SPH_C64(0x9494FE40D4FE94A1), SPH_C64(0xF7F70C1CEB0CF704), + SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x13135F8B985F1335), + SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0xD3D3B805D6B8D368), + SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0x6E6ECB3957CB6EB2), + SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0x03030F1B180F0305), + SPH_C64(0x565613DC8A1356FA), SPH_C64(0x4444495E1A4944CC), + SPH_C64(0x7F7F9EA0DF9E7F81), SPH_C64(0xA9A937882137A9E6), + SPH_C64(0x2A2A82674D822A7E), SPH_C64(0xBBBB6D0AB16DBBD0), + SPH_C64(0xC1C1E28746E2C15E), SPH_C64(0x535302F1A20253F5), + SPH_C64(0xDCDC8B72AE8BDC79), SPH_C64(0x0B0B275358270B1D), + SPH_C64(0x9D9DD3019CD39DBA), SPH_C64(0x6C6CC12B47C16CB4), + SPH_C64(0x3131F5A495F53153), SPH_C64(0x7474B9F387B9749C), + SPH_C64(0xF6F60915E309F607), SPH_C64(0x4646434C0A4346CA), + SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x898997B53C978986), + SPH_C64(0x141444B4A044143C), SPH_C64(0xE1E142BA5B42E13E), + SPH_C64(0x16164EA6B04E163A), SPH_C64(0x3A3AD2F7CDD23A4E), + SPH_C64(0x6969D0066FD069BB), SPH_C64(0x09092D41482D091B), + SPH_C64(0x7070ADD7A7AD7090), SPH_C64(0xB6B6546FD954B6C7), + SPH_C64(0xD0D0B71ECEB7D06D), SPH_C64(0xEDED7ED63B7EED2A), + SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x424257682A5742C6), + SPH_C64(0x9898C22CB4C298B5), SPH_C64(0xA4A40EED490EA4F1), + SPH_C64(0x282888755D882878), SPH_C64(0x5C5C3186DA315CE4), + SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x8686A4C244A48697) +}; + +static const sph_u64 old1_T7[256] = { + SPH_C64(0x1878D8C078182818), SPH_C64(0x23AF2605AF236523), + SPH_C64(0xC6F9B87EF9C657C6), SPH_C64(0xE86FFB136FE825E8), + SPH_C64(0x87A1CB4CA1879487), SPH_C64(0xB86211A962B8D5B8), + SPH_C64(0x0105090805010301), SPH_C64(0x4F6E0D426E4FD14F), + SPH_C64(0x36EE9BADEE365A36), SPH_C64(0xA604FF5904A6F7A6), + SPH_C64(0xD2BD0CDEBDD26BD2), SPH_C64(0xF5060EFB06F502F5), + SPH_C64(0x798096EF80798B79), SPH_C64(0x6FCE305FCE6FB16F), + SPH_C64(0x91EF6DFCEF91AE91), SPH_C64(0x5207F8AA0752F652), + SPH_C64(0x60FD4727FD60A060), SPH_C64(0xBC76358976BCD9BC), + SPH_C64(0x9BCD37ACCD9BB09B), SPH_C64(0x8E8C8A048C8E8F8E), + SPH_C64(0xA315D27115A3F8A3), SPH_C64(0x0C3C6C603C0C140C), + SPH_C64(0x7B8A84FF8A7B8D7B), SPH_C64(0x35E180B5E1355F35), + SPH_C64(0x1D69F5E8691D271D), SPH_C64(0xE047B35347E03DE0), + SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xC2ED9C5EEDC25BC2), + SPH_C64(0x2E96436D962E722E), SPH_C64(0x4B7A29627A4BDD4B), + SPH_C64(0xFE215DA321FE1FFE), SPH_C64(0x5716D5821657F957), + SPH_C64(0x1541BDA841153F15), SPH_C64(0x77B6E89FB6779977), + SPH_C64(0x37EB92A5EB375937), SPH_C64(0xE5569E7B56E532E5), + SPH_C64(0x9FD9138CD99FBC9F), SPH_C64(0xF01723D317F00DF0), + SPH_C64(0x4A7F206A7F4ADE4A), SPH_C64(0xDA95449E95DA73DA), + SPH_C64(0x5825A2FA2558E858), SPH_C64(0xC9CACF06CAC946C9), + SPH_C64(0x298D7C558D297B29), SPH_C64(0x0A225A50220A1E0A), + SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xA01AC9691AA0FDA0), + SPH_C64(0x6BDA147FDA6BBD6B), SPH_C64(0x85ABD95CAB859285), + SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x5D348FD2345DE75D), + SPH_C64(0x1050908050103010), SPH_C64(0xF40307F303F401F4), + SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x3EC6D3EDC63E423E), + SPH_C64(0x05112D2811050F05), SPH_C64(0x67E6781FE667A967), + SPH_C64(0xE453977353E431E4), SPH_C64(0x27BB0225BB276927), + SPH_C64(0x415873325841C341), SPH_C64(0x8B9DA72C9D8B808B), + SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x7D94B2CF947D877D), + SPH_C64(0x95FB49DCFB95A295), SPH_C64(0xD89F568E9FD875D8), + SPH_C64(0xFB30708B30FB10FB), SPH_C64(0xEE71CD2371EE2FEE), + SPH_C64(0x7C91BBC7917C847C), SPH_C64(0x66E37117E366AA66), + SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0x174BAFB84B173917), + SPH_C64(0x474645024647C947), SPH_C64(0x9EDC1A84DC9EBF9E), + SPH_C64(0xCAC5D41EC5CA43CA), SPH_C64(0x2D995875992D772D), + SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x071B3F381B070907), + SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x5A2FB0EA2F5AEE5A), + SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x33FFB685FF335533), + SPH_C64(0x63F25C3FF263A563), SPH_C64(0x020A12100A020602), + SPH_C64(0xAA38933938AAE3AA), SPH_C64(0x71A8DEAFA8719371), + SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x197DD1C87D192B19), + SPH_C64(0x49703B727049DB49), SPH_C64(0xD99A5F869AD976D9), + SPH_C64(0xF21D31C31DF20BF2), SPH_C64(0xE348A84B48E338E3), + SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0x8892BC3492888588), + SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x26BE0B2DBE266A26), + SPH_C64(0x32FABF8DFA325632), SPH_C64(0xB04A59E94AB0CDB0), + SPH_C64(0xE96AF21B6AE926E9), SPH_C64(0x0F337778330F110F), + SPH_C64(0xD5A633E6A6D562D5), SPH_C64(0x80BAF474BA809D80), + SPH_C64(0xBE7C27997CBEDFBE), SPH_C64(0xCDDEEB26DECD4ACD), + SPH_C64(0x34E489BDE4345C34), SPH_C64(0x4875327A7548D848), + SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x7A8F8DF78F7A8E7A), + SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x5F3E9DC23E5FE15F), + SPH_C64(0x20A03D1DA0206020), SPH_C64(0x68D50F67D568B868), + SPH_C64(0x1A72CAD0721A2E1A), SPH_C64(0xAE2CB7192CAEEFAE), + SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5419CE9A1954FC54), + SPH_C64(0x93E57FECE593A893), SPH_C64(0x22AA2F0DAA226622), + SPH_C64(0x64E96307E964AC64), SPH_C64(0xF1122ADB12F10EF1), + SPH_C64(0x73A2CCBFA2739573), SPH_C64(0x125A82905A123612), + SPH_C64(0x405D7A3A5D40C040), SPH_C64(0x0828484028081808), + SPH_C64(0xC3E89556E8C358C3), SPH_C64(0xEC7BDF337BEC29EC), + SPH_C64(0xDB904D9690DB70DB), SPH_C64(0xA11FC0611FA1FEA1), + SPH_C64(0x8D83911C838D8A8D), SPH_C64(0x3DC9C8F5C93D473D), + SPH_C64(0x97F15BCCF197A497), SPH_C64(0x0000000000000000), + SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0x2B876E45872B7D2B), + SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82), + SPH_C64(0xD6A928FEA9D667D6), SPH_C64(0x1B77C3D8771B2D1B), + SPH_C64(0xB55B74C15BB5C2B5), SPH_C64(0xAF29BE1129AFECAF), + SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0x500DEABA0D50F050), + SPH_C64(0x454C57124C45CF45), SPH_C64(0xF31838CB18F308F3), + SPH_C64(0x30F0AD9DF0305030), SPH_C64(0xEF74C42B74EF2CEF), + SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x551CC7921C55FF55), + SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xEA65E90365EA23EA), + SPH_C64(0x65EC6A0FEC65AF65), SPH_C64(0xBA6803B968BAD3BA), + SPH_C64(0x2F934A65932F712F), SPH_C64(0xC0E78E4EE7C05DC0), + SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x1C6CFCE06C1C241C), + SPH_C64(0xFD2E46BB2EFD1AFD), SPH_C64(0x4D641F52644DD74D), + SPH_C64(0x92E076E4E092AB92), SPH_C64(0x75BCFA8FBC759F75), + SPH_C64(0x061E36301E060A06), SPH_C64(0x8A98AE24988A838A), + SPH_C64(0xB2404BF940B2CBB2), SPH_C64(0xE659856359E637E6), + SPH_C64(0x0E367E70360E120E), SPH_C64(0x1F63E7F8631F211F), + SPH_C64(0x62F75537F762A662), SPH_C64(0xD4A33AEEA3D461D4), + SPH_C64(0xA832812932A8E5A8), SPH_C64(0x96F452C4F496A796), + SPH_C64(0xF93A629B3AF916F9), SPH_C64(0xC5F6A366F6C552C5), + SPH_C64(0x25B11035B1256F25), SPH_C64(0x5920ABF22059EB59), + SPH_C64(0x84AED054AE849184), SPH_C64(0x72A7C5B7A7729672), + SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x4C61165A614CD44C), + SPH_C64(0x5E3B94CA3B5EE25E), SPH_C64(0x78859FE785788878), + SPH_C64(0x38D8E5DDD8384838), SPH_C64(0x8C869814868C898C), + SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0xA50BE4410BA5F2A5), + SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0x61F84E2FF861A361), + SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x21A53415A5216321), + SPH_C64(0x9CD60894D69CB99C), SPH_C64(0x1E66EEF0661E221E), + SPH_C64(0x435261225243C543), SPH_C64(0xC7FCB176FCC754C7), + SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0x0414242014040C04), + SPH_C64(0x5108E3B20851F351), SPH_C64(0x99C725BCC799B699), + SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0x0D396568390D170D), + SPH_C64(0xFA35798335FA13FA), SPH_C64(0xDF8469B684DF7CDF), + SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x24B4193DB4246C24), + SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0xAB3D9A313DABE0AB), + SPH_C64(0xCED1F03ED1CE4FCE), SPH_C64(0x1155998855113311), + SPH_C64(0x8F89830C898F8C8F), SPH_C64(0x4E6B044A6B4ED24E), + SPH_C64(0xB75166D151B7C4B7), SPH_C64(0xEB60E00B60EB20EB), + SPH_C64(0x3CCCC1FDCC3C443C), SPH_C64(0x81BFFD7CBF819E81), + SPH_C64(0x94FE40D4FE94A194), SPH_C64(0xF70C1CEB0CF704F7), + SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x135F8B985F133513), + SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0xD3B805D6B8D368D3), + SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0x6ECB3957CB6EB26E), + SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0x030F1B180F030503), + SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x44495E1A4944CC44), + SPH_C64(0x7F9EA0DF9E7F817F), SPH_C64(0xA937882137A9E6A9), + SPH_C64(0x2A82674D822A7E2A), SPH_C64(0xBB6D0AB16DBBD0BB), + SPH_C64(0xC1E28746E2C15EC1), SPH_C64(0x5302F1A20253F553), + SPH_C64(0xDC8B72AE8BDC79DC), SPH_C64(0x0B275358270B1D0B), + SPH_C64(0x9DD3019CD39DBA9D), SPH_C64(0x6CC12B47C16CB46C), + SPH_C64(0x31F5A495F5315331), SPH_C64(0x74B9F387B9749C74), + SPH_C64(0xF60915E309F607F6), SPH_C64(0x46434C0A4346CA46), + SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x8997B53C97898689), + SPH_C64(0x1444B4A044143C14), SPH_C64(0xE142BA5B42E13EE1), + SPH_C64(0x164EA6B04E163A16), SPH_C64(0x3AD2F7CDD23A4E3A), + SPH_C64(0x69D0066FD069BB69), SPH_C64(0x092D41482D091B09), + SPH_C64(0x70ADD7A7AD709070), SPH_C64(0xB6546FD954B6C7B6), + SPH_C64(0xD0B71ECEB7D06DD0), SPH_C64(0xED7ED63B7EED2AED), + SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x4257682A5742C642), + SPH_C64(0x98C22CB4C298B598), SPH_C64(0xA40EED490EA4F1A4), + SPH_C64(0x2888755D88287828), SPH_C64(0x5C3186DA315CE45C), + SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x86A4C244A4869786) +}; + +#endif + +static const sph_u64 old1_RC[10] = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + +/* ====================================================================== */ + +#define DECL8(z) sph_u64 z ## 0, z ## 1, z ## 2, z ## 3, \ + z ## 4, z ## 5, z ## 6, z ## 7 + +#if SPH_LITTLE_FAST +#define READ_DATA_W(x) do { \ + n ## x = sph_dec64le_aligned( \ + (const unsigned char *)src + 8 * (x)); \ + } while (0) +#define UPDATE_STATE_W(x) do { \ + state[x] ^= n ## x ^ sph_dec64le_aligned( \ + (const unsigned char *)src + 8 * (x)); \ + } while (0) +#define LVARS DECL8(n); DECL8(h); +#else +#define READ_DATA_W(x) do { \ + sn ## x = n ## x = sph_dec64le_aligned( \ + (const unsigned char *)src + 8 * (x)); \ + } while (0) +#define UPDATE_STATE_W(x) do { \ + state[x] ^= n ## x ^ sn ## x; \ + } while (0) +#define LVARS DECL8(n); DECL8(sn); DECL8(h); +#endif + +#define READ_STATE_W(x) do { h ## x = state[x]; } while (0) + +#define MUL8(FUN) do { \ + FUN(0); \ + FUN(1); \ + FUN(2); \ + FUN(3); \ + FUN(4); \ + FUN(5); \ + FUN(6); \ + FUN(7); \ + } while (0) + +/* + * First operation: XOR the input data with the first round key. + */ +#define ROUND0_W(x) do { \ + n ## x ^= h ## x; \ + } while (0) + +#define READ_DATA MUL8(READ_DATA_W) +#define READ_STATE MUL8(READ_STATE_W) +#define ROUND0 MUL8(ROUND0_W) +#define UPDATE_STATE MUL8(UPDATE_STATE_W) + +#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF) + +#if SPH_SMALL_FOOTPRINT_WHIRLPOOL + +static SPH_INLINE sph_u64 +table_skew(sph_u64 val, int num) +{ + return SPH_ROTL64(val, 8 * num); +} + +#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \ + (table ## 0[BYTE(in ## i0, 0)] \ + ^ table_skew(table ## 0[BYTE(in ## i1, 1)], 1) \ + ^ table_skew(table ## 0[BYTE(in ## i2, 2)], 2) \ + ^ table_skew(table ## 0[BYTE(in ## i3, 3)], 3) \ + ^ table_skew(table ## 0[BYTE(in ## i4, 4)], 4) \ + ^ table_skew(table ## 0[BYTE(in ## i5, 5)], 5) \ + ^ table_skew(table ## 0[BYTE(in ## i6, 6)], 6) \ + ^ table_skew(table ## 0[BYTE(in ## i7, 7)], 7)) +#else +#define ROUND_ELT(table, in, i0, i1, i2, i3, i4, i5, i6, i7) \ + (table ## 0[BYTE(in ## i0, 0)] \ + ^ table ## 1[BYTE(in ## i1, 1)] \ + ^ table ## 2[BYTE(in ## i2, 2)] \ + ^ table ## 3[BYTE(in ## i3, 3)] \ + ^ table ## 4[BYTE(in ## i4, 4)] \ + ^ table ## 5[BYTE(in ## i5, 5)] \ + ^ table ## 6[BYTE(in ## i6, 6)] \ + ^ table ## 7[BYTE(in ## i7, 7)]) +#endif + +#define ROUND(table, in, out, c0, c1, c2, c3, c4, c5, c6, c7) do { \ + out ## 0 = ROUND_ELT(table, in, 0, 7, 6, 5, 4, 3, 2, 1) ^ c0; \ + out ## 1 = ROUND_ELT(table, in, 1, 0, 7, 6, 5, 4, 3, 2) ^ c1; \ + out ## 2 = ROUND_ELT(table, in, 2, 1, 0, 7, 6, 5, 4, 3) ^ c2; \ + out ## 3 = ROUND_ELT(table, in, 3, 2, 1, 0, 7, 6, 5, 4) ^ c3; \ + out ## 4 = ROUND_ELT(table, in, 4, 3, 2, 1, 0, 7, 6, 5) ^ c4; \ + out ## 5 = ROUND_ELT(table, in, 5, 4, 3, 2, 1, 0, 7, 6) ^ c5; \ + out ## 6 = ROUND_ELT(table, in, 6, 5, 4, 3, 2, 1, 0, 7) ^ c6; \ + out ## 7 = ROUND_ELT(table, in, 7, 6, 5, 4, 3, 2, 1, 0) ^ c7; \ + } while (0) + +#define ROUND_KSCHED(table, in, out, c) \ + ROUND(table, in, out, c, 0, 0, 0, 0, 0, 0, 0) + +#define ROUND_WENC(table, in, key, out) \ + ROUND(table, in, out, key ## 0, key ## 1, key ## 2, \ + key ## 3, key ## 4, key ## 5, key ## 6, key ## 7) + +#define TRANSFER(dst, src) do { \ + dst ## 0 = src ## 0; \ + dst ## 1 = src ## 1; \ + dst ## 2 = src ## 2; \ + dst ## 3 = src ## 3; \ + dst ## 4 = src ## 4; \ + dst ## 5 = src ## 5; \ + dst ## 6 = src ## 6; \ + dst ## 7 = src ## 7; \ + } while (0) + +/* see sph_whirlpool.h */ +void +sph_whirlpool_init(void *cc) +{ + sph_whirlpool_context *sc; + + sc = cc; + /* + * We want to set all eight 64-bit words to 0. A "memset()" + * is not, theoretically, fully standard, but in practice it + * will work everywhere. + */ + memset(sc->state, 0, sizeof sc->state); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define ROUND_FUN(name, type) \ +static void \ +name ## _round(const void *src, sph_u64 *state) \ +{ \ + LVARS \ + int r; \ + \ + READ_DATA; \ + READ_STATE; \ + ROUND0; \ + for (r = 0; r < 10; r ++) { \ + DECL8(tmp); \ + \ + ROUND_KSCHED(type ## _T, h, tmp, type ## _RC[r]); \ + TRANSFER(h, tmp); \ + ROUND_WENC(type ## _T, n, h, tmp); \ + TRANSFER(n, tmp); \ + } \ + UPDATE_STATE; \ +} + +ROUND_FUN(whirlpool, plain) +ROUND_FUN(whirlpool0, old0) +ROUND_FUN(whirlpool1, old1) + +/* + * We want big-endian encoding of the message length, over 256 bits. BE64 + * triggers that. However, our block length is 512 bits, not 1024 bits. + * Internally, our encoding/decoding is little-endian, which is not a + * problem here since we also deactivate output in md_helper.c. + */ +#define BE64 1 +#define SVAL sc->state +#define BLEN 64U +#define PLW4 1 + +#define RFUN whirlpool_round +#define HASH whirlpool +#include "md_helper.c" +#undef RFUN +#undef HASH + +#define RFUN whirlpool0_round +#define HASH whirlpool0 +#include "md_helper.c" +#undef RFUN +#undef HASH + +#define RFUN whirlpool1_round +#define HASH whirlpool1 +#include "md_helper.c" +#undef RFUN +#undef HASH + +#define MAKE_CLOSE(name) \ +void \ +sph_ ## name ## _close(void *cc, void *dst) \ +{ \ + sph_ ## name ## _context *sc; \ + int i; \ + \ + name ## _close(cc, dst, 0); \ + sc = cc; \ + for (i = 0; i < 8; i ++) \ + sph_enc64le((unsigned char *)dst + 8 * i, sc->state[i]); \ + sph_ ## name ## _init(cc); \ +} + +MAKE_CLOSE(whirlpool) +MAKE_CLOSE(whirlpool0) +MAKE_CLOSE(whirlpool1) + +#endif diff --git a/util.c b/util.c index 451aaed484..38c237a121 100644 --- a/util.c +++ b/util.c @@ -7,7 +7,7 @@ * Software Foundation; either version 2 of the License, or (at your option) * any later version. See COPYING for more details. */ - + #define _GNU_SOURCE #include "cpuminer-config.h" @@ -74,21 +74,23 @@ void applog(int prio, const char *fmt, ...) #ifdef HAVE_SYSLOG_H if (use_syslog) { - va_list ap2; + va_list ap2, ap3; char *buf; int len; va_copy(ap2, ap); + va_copy(ap3, ap); len = vsnprintf(NULL, 0, fmt, ap2) + 1; va_end(ap2); buf = alloca(len); - if (vsnprintf(buf, len, fmt, ap) >= 0) + if (vsnprintf(buf, len, fmt, ap3) >= 0) syslog(prio, "%s", buf); + va_end(ap3); } #else if (0) {} #endif - else { + if (1) { char *f; int len; time_t now; @@ -296,6 +298,7 @@ static int sockopt_keepalive_cb(void *userdata, curl_socket_t fd, } #endif + json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass, const char *rpc_req, bool longpoll_scan, bool longpoll, int *curl_err) @@ -449,6 +452,229 @@ json_t *json_rpc_call(CURL *curl, const char *url, return NULL; } + + +static char *hack_json_numbers(const char *in) +{ + char *out; + int i, off, intoff; + bool in_str, in_int; + + out =(char*) calloc(2 * strlen(in) + 1, 1); + if (!out) + return NULL; + off = intoff = 0; + in_str = in_int = false; + for (i = 0; in[i]; i++) { + char c = in[i]; + if (c == '"') { + in_str = !in_str; + } + else if (c == '\\') { + out[off++] = c; + if (!in[++i]) + break; + } + else if (!in_str && !in_int && isdigit(c)) { + intoff = off; + in_int = true; + } + else if (in_int && !isdigit(c)) { + if (c != '.' && c != 'e' && c != 'E' && c != '+' && c != '-') { + in_int = false; + if (off - intoff > 4) { + char *end; +#if JSON_INTEGER_IS_LONG_LONG + errno = 0; + strtoll(out + intoff, &end, 10); + if (!*end && errno == ERANGE) { +#else + long l; + errno = 0; + l = strtol(out + intoff, &end, 10); + if (!*end && (errno == ERANGE || l > INT_MAX)) { +#endif + out[off++] = '.'; + out[off++] = '0'; + } + } + } + } + out[off++] = in[i]; + } + return out; + } + + +json_t *json_rpc_call2(CURL *curl, const char *url, + const char *userpass, const char *rpc_req, + int *curl_err, int flags) +{ + json_t *val, *err_val, *res_val; + int rc; + long http_rc; + struct data_buffer all_data = { 0 }; + struct upload_buffer upload_data; + char *json_buf; + json_error_t err; + struct curl_slist *headers = NULL; + char len_hdr[64]; + char curl_err_str[CURL_ERROR_SIZE]; + long timeout = (flags & JSON_RPC_LONGPOLL) ? opt_timeout : 30; + struct header_info hi = { 0 }; + + /* it is assumed that 'curl' is freshly [re]initialized at this pt */ + + if (opt_protocol) + curl_easy_setopt(curl, CURLOPT_VERBOSE, 1); + curl_easy_setopt(curl, CURLOPT_URL, url); + if (opt_cert) + curl_easy_setopt(curl, CURLOPT_CAINFO, opt_cert); + curl_easy_setopt(curl, CURLOPT_ENCODING, ""); + curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1); + curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1); + curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1); + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, all_data_cb); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, &all_data); + curl_easy_setopt(curl, CURLOPT_READFUNCTION, upload_data_cb); + curl_easy_setopt(curl, CURLOPT_READDATA, &upload_data); +#if LIBCURL_VERSION_NUM >= 0x071200 + curl_easy_setopt(curl, CURLOPT_SEEKFUNCTION, &seek_data_cb); + curl_easy_setopt(curl, CURLOPT_SEEKDATA, &upload_data); +#endif + curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_err_str); + if (opt_redirect) + curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); + curl_easy_setopt(curl, CURLOPT_TIMEOUT, timeout); + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, resp_hdr_cb); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, &hi); + if (opt_proxy) { + curl_easy_setopt(curl, CURLOPT_PROXY, opt_proxy); + curl_easy_setopt(curl, CURLOPT_PROXYTYPE, opt_proxy_type); + } + if (userpass) { + curl_easy_setopt(curl, CURLOPT_USERPWD, userpass); + curl_easy_setopt(curl, CURLOPT_HTTPAUTH, CURLAUTH_BASIC); + } +#if LIBCURL_VERSION_NUM >= 0x070f06 + if (flags & JSON_RPC_LONGPOLL) + curl_easy_setopt(curl, CURLOPT_SOCKOPTFUNCTION, sockopt_keepalive_cb); +#endif + curl_easy_setopt(curl, CURLOPT_POST, 1); + + if (opt_protocol) + applog(LOG_DEBUG, "JSON protocol request:\n%s\n", rpc_req); + + upload_data.buf = rpc_req; + upload_data.len = strlen(rpc_req); + upload_data.pos = 0; + sprintf(len_hdr, "Content-Length: %lu", + (unsigned long)upload_data.len); + + headers = curl_slist_append(headers, "Content-Type: application/json"); + headers = curl_slist_append(headers, len_hdr); + headers = curl_slist_append(headers, "User-Agent: " USER_AGENT); + headers = curl_slist_append(headers, "X-Mining-Extensions: midstate"); + headers = curl_slist_append(headers, "Accept:"); /* disable Accept hdr*/ + headers = curl_slist_append(headers, "Expect:"); /* disable Expect hdr*/ + + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + + rc = curl_easy_perform(curl); + if (curl_err != NULL) + *curl_err = rc; + if (rc) { + curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_rc); + if (!((flags & JSON_RPC_LONGPOLL) && rc == CURLE_OPERATION_TIMEDOUT) && + !((flags & JSON_RPC_QUIET_404) && http_rc == 404)) + applog(LOG_ERR, "HTTP request failed: %s", curl_err_str); + if (curl_err && (flags & JSON_RPC_QUIET_404) && http_rc == 404) + *curl_err = CURLE_OK; + goto err_out; + } + + /* If X-Stratum was found, activate Stratum */ + if (want_stratum && hi.stratum_url && + !strncasecmp(hi.stratum_url, "stratum+tcp://", 14)) { + have_stratum = true; + tq_push(thr_info[stratum_thr_id].q, hi.stratum_url); + hi.stratum_url = NULL; + } + + /* If X-Long-Polling was found, activate long polling */ + if (!have_longpoll && want_longpoll && hi.lp_path && !have_gbt && + allow_getwork && !have_stratum) { + have_longpoll = true; + tq_push(thr_info[longpoll_thr_id].q, hi.lp_path); + hi.lp_path = NULL; + } + + if (!all_data.buf) { + applog(LOG_ERR, "Empty data received in json_rpc_call."); + goto err_out; + } + + json_buf = hack_json_numbers((const char*)all_data.buf); + errno = 0; /* needed for Jansson < 2.1 */ + val = JSON_LOADS(json_buf, &err); + free(json_buf); + if (!val) { + applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text); + goto err_out; + } + + if (opt_protocol) { + char *s = json_dumps(val, JSON_INDENT(3)); + applog(LOG_DEBUG, "JSON protocol response:\n%s", s); + free(s); + } + + /* JSON-RPC valid response returns a 'result' and a null 'error'. */ + res_val = json_object_get(val, "result"); + err_val = json_object_get(val, "error"); + + if (!res_val || (err_val && !json_is_null(err_val))) { + char *s; + + if (err_val) + s = json_dumps(err_val, JSON_INDENT(3)); + else + s = strdup("(unknown reason)"); + + applog(LOG_ERR, "JSON-RPC call failed: %s", s); + + free(s); + + goto err_out; + } + + if (hi.reason) + json_object_set_new(val, "reject-reason", json_string(hi.reason)); + + databuf_free(&all_data); + curl_slist_free_all(headers); + curl_easy_reset(curl); + return val; + +err_out: + free(hi.lp_path); + free(hi.reason); + free(hi.stratum_url); + databuf_free(&all_data); + curl_slist_free_all(headers); + curl_easy_reset(curl); + return NULL; +} + + +void abin2hex(char *s, const unsigned char *p, size_t len) +{ + int i; + for (i = 0; i < len; i++) + sprintf(s + (i * 2), "%02x", (unsigned int) p[i]); +} + + char *bin2hex(const unsigned char *p, size_t len) { unsigned int i; @@ -489,6 +715,140 @@ bool hex2bin(unsigned char *p, const char *hexstr, size_t len) return (len == 0 && *hexstr == 0) ? true : false; } +int varint_encode(unsigned char *p, uint64_t n) +{ + int i; + if (n < 0xfd) { + p[0] = n; + return 1; + } + if (n <= 0xffff) { + p[0] = 0xfd; + p[1] = n & 0xff; + p[2] = n >> 8; + return 3; + } + if (n <= 0xffffffff) { + p[0] = 0xfe; + for (i = 1; i < 5; i++) { + p[i] = n & 0xff; + n >>= 8; + } + return 5; + } + p[0] = 0xff; + for (i = 1; i < 9; i++) { + p[i] = n & 0xff; + n >>= 8; + } + return 9; +} + +static const char b58digits[] = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + +static bool b58dec(unsigned char *bin, size_t binsz, const char *b58) +{ + size_t i, j; + uint64_t t; + uint32_t c; + uint32_t *outi; + size_t outisz = (binsz + 3) / 4; + int rem = binsz % 4; + uint32_t remmask = 0xffffffff << (8 * rem); + size_t b58sz = strlen(b58); + bool rc = false; + + outi = (uint32_t*) calloc(outisz, sizeof(*outi)); + + for (i = 0; i < b58sz; ++i) { + for (c = 0; b58digits[c] != b58[i]; c++) + if (!b58digits[c]) + goto out; + for (j = outisz; j--;) { + t = (uint64_t)outi[j] * 58 + c; + c = t >> 32; + outi[j] = t & 0xffffffff; + } + if (c || outi[0] & remmask) + goto out; + } + + j = 0; + switch (rem) { + case 3: + *(bin++) = (outi[0] >> 16) & 0xff; + case 2: + *(bin++) = (outi[0] >> 8) & 0xff; + case 1: + *(bin++) = outi[0] & 0xff; + ++j; + default: + break; + } + for (; j < outisz; ++j) { + be32enc((uint32_t *)bin, outi[j]); + bin += sizeof(uint32_t); + } + + rc = true; +out: + free(outi); + return rc; +} + +static int b58check(unsigned char *bin, size_t binsz, const char *b58) +{ + unsigned char buf[32]; + int i; + + sha256d(buf, bin, binsz - 4); + if (memcmp(&bin[binsz - 4], buf, 4)) + return -1; + + /* Check number of zeros is correct AFTER verifying checksum + * (to avoid possibility of accessing the string beyond the end) */ + for (i = 0; bin[i] == '\0' && b58[i] == '1'; ++i); + if (bin[i] == '\0' || b58[i] == '1') + return -3; + + return bin[0]; +} + +size_t address_to_script(unsigned char *out, size_t outsz, const char *addr) +{ + unsigned char addrbin[25]; + int addrver; + size_t rv; + + if (!b58dec(addrbin, sizeof(addrbin), addr)) + return 0; + addrver = b58check(addrbin, sizeof(addrbin), addr); + if (addrver < 0) + return 0; + switch (addrver) { + case 5: /* Bitcoin script hash */ + case 196: /* Testnet script hash */ + if (outsz < (rv = 23)) + return rv; + out[0] = 0xa9; /* OP_HASH160 */ + out[1] = 0x14; /* push 20 bytes */ + memcpy(&out[2], &addrbin[1], 20); + out[22] = 0x87; /* OP_EQUAL */ + return rv; + default: + if (outsz < (rv = 25)) + return rv; + out[0] = 0x76; /* OP_DUP */ + out[1] = 0xa9; /* OP_HASH160 */ + out[2] = 0x14; /* push 20 bytes */ + memcpy(&out[3], &addrbin[1], 20); + out[23] = 0x88; /* OP_EQUALVERIFY */ + out[24] = 0xac; /* OP_CHECKSIG */ + return rv; + } +} + + /* Subtract the `struct timeval' values X and Y, storing the result in RESULT. Return 1 if the difference is negative, otherwise 0. */ @@ -981,6 +1341,54 @@ bool stratum_authorize(struct stratum_ctx *sctx, const char *user, const char *p return ret; } +static bool stratum_notify_m7(struct stratum_ctx *sctx, json_t *params) +{ + const char *job_id, *prevblock, *accroot, *merkleroot, *version, *ntime; + int height; + bool clean; + + job_id = json_string_value(json_array_get(params, 0)); + prevblock = json_string_value(json_array_get(params, 1)); + accroot = json_string_value(json_array_get(params, 2)); + merkleroot = json_string_value(json_array_get(params, 3)); + height = json_integer_value(json_array_get(params, 4)); + version = json_string_value(json_array_get(params, 5)); + ntime = json_string_value(json_array_get(params, 6)); + clean = json_is_true(json_array_get(params, 7)); + + if (!job_id || !prevblock || !accroot || !merkleroot || + !version || !height || !ntime || + strlen(prevblock) != 32*2 || + strlen(accroot) != 32*2 || + strlen(merkleroot) != 32*2 || + strlen(ntime) != 8*2 || strlen(version) != 2*2) { + applog(LOG_ERR, "Stratum (M7) notify: invalid parameters"); + return false; + } + + pthread_mutex_lock(&sctx->work_lock); + + if (!sctx->job.job_id || strcmp(sctx->job.job_id, job_id)) { + sctx->job.xnonce2 = (unsigned char *)realloc(sctx->job.xnonce2, sctx->xnonce2_size); + memset(sctx->job.xnonce2, 0, sctx->xnonce2_size); + } + free(sctx->job.job_id); + sctx->job.job_id = strdup(job_id); + + hex2bin(sctx->job.m7prevblock, prevblock, 32); + hex2bin(sctx->job.m7accroot, accroot, 32); + hex2bin(sctx->job.m7merkleroot, merkleroot, 32); + be64enc(sctx->job.m7height, height); + hex2bin(sctx->job.m7version, version, 2); + hex2bin(sctx->job.m7ntime, ntime, 8); + sctx->job.clean = clean; + + sctx->job.diff = sctx->next_diff; + + pthread_mutex_unlock(&sctx->work_lock); + + return true; +} static bool stratum_notify(struct stratum_ctx *sctx, json_t *params) { @@ -1177,11 +1585,70 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s) goto out; id = json_object_get(val, "id"); params = json_object_get(val, "params"); + + if (!strcasecmp(method, "mining.notify")) { + ret = stratum_notify(sctx, params); + goto out; + } + + if (!strcasecmp(method, "mining.set_difficulty")) { + ret = stratum_set_difficulty(sctx, params); + goto out; + } + if (!strcasecmp(method, "client.reconnect")) { + ret = stratum_reconnect(sctx, params); + goto out; + } + if (!strcasecmp(method, "client.get_version")) { + ret = stratum_get_version(sctx, id); + goto out; + } + if (!strcasecmp(method, "client.show_message")) { + ret = stratum_show_message(sctx, id, params); + goto out; + } + +out: + if (val) + json_decref(val); + + return ret; +} + +bool stratum_handle_method_m7(struct stratum_ctx *sctx, const char *s) +{ + json_t *val, *id, *params; + json_error_t err; + const char *method; + bool ret = false; + val = JSON_LOADS(s, &err); + if (!val) { + applog(LOG_ERR, "JSON decode failed(%d): %s", err.line, err.text); + goto out; + } + + method = json_string_value(json_object_get(val, "method")); + if (!method) + goto out; + id = json_object_get(val, "id"); + params = json_object_get(val, "params"); + /* if (!strcasecmp(method, "mining.notify")) { ret = stratum_notify(sctx, params); goto out; } + */ + if (!strcasecmp(method, "mining.notify")) { +// if (opt_algo == ALGO_M7) { + ret = stratum_notify_m7(sctx, params); +// } else { +// ret = stratum_notify(sctx, params); +// } + goto out; + } + + if (!strcasecmp(method, "mining.set_difficulty")) { ret = stratum_set_difficulty(sctx, params); goto out; @@ -1206,6 +1673,7 @@ bool stratum_handle_method(struct stratum_ctx *sctx, const char *s) return ret; } + struct thread_q *tq_new(void) { struct thread_q *tq; diff --git a/x11/cuda_x11_shavite512.cu b/x11/cuda_x11_shavite512.cu index b3fd9258ed..95152e4b8a 100644 --- a/x11/cuda_x11_shavite512.cu +++ b/x11/cuda_x11_shavite512.cu @@ -1,4 +1,10 @@ // aus heavy.cu +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); typedef unsigned char BitSequence; @@ -13,6 +19,8 @@ typedef unsigned long long uint64_t; #define SPH_C32(x) ((uint32_t)(x ## U)) #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + __constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding) + static __constant__ uint32_t d_ShaviteInitVector[16]; static const uint32_t h_ShaviteInitVector[] = { SPH_C32(0x72FCCDD8), SPH_C32(0x79CA4727), SPH_C32(0x128A077B), SPH_C32(0x40D55AEC), @@ -23,6 +31,11 @@ static const uint32_t h_ShaviteInitVector[] = { #include "cuda_x11_aes.cu" +static __device__ uint32_t cuda_swab32(uint32_t x) +{ + return __byte_perm(x, 0, 0x0123); +} + static __device__ __forceinline__ void AES_ROUND_NOKEY( const uint32_t* __restrict__ sharedMemory, uint32_t &x0, uint32_t &x1, uint32_t &x2, uint32_t &x3) @@ -54,7 +67,7 @@ static __device__ __forceinline__ void KEY_EXPAND_ELT( } static __device__ void -c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg) +c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg, uint32_t count) { uint32_t p0, p1, p2, p3, p4, p5, p6, p7; uint32_t p8, p9, pA, pB, pC, pD, pE, pF; @@ -63,7 +76,7 @@ c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg) uint32_t rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; uint32_t rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; uint32_t rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; - const uint32_t counter = 512; + const uint32_t counter = count; p0 = state[0x0]; p1 = state[0x1]; @@ -1299,6 +1312,48 @@ c512(const uint32_t* sharedMemory, uint32_t *state, uint32_t *msg) } + +__global__ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash) +{ + __shared__ uint32_t sharedMemory[1024]; + + aes_gpu_init(sharedMemory); + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = startNounce + thread; + + // kopiere init-state + uint32_t state[16]; + + +#pragma unroll 16 + for(int i=0;i<16;i++) { + state[i] = d_ShaviteInitVector[i];} + + uint32_t msg[32]; + +#pragma unroll 32 + for(int i=0;i<32;i++) { + msg[i] = c_PaddedMessage80[i];} + msg[19] = cuda_swab32(nounce); + msg[20] = 0x80; + msg[27] = 0x2800000; + msg[31] = 0x2000000; + + c512(sharedMemory, state, msg,640); + +uint32_t *outHash = (uint32_t *)outputHash + 16 * thread; + +#pragma unroll 16 + for(int i=0;i<16;i++) + outHash[i] = state[i]; + + + } //thread < threads +} // Die Hash-Funktion __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { @@ -1306,6 +1361,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui aes_gpu_init(sharedMemory); + int thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { @@ -1341,12 +1397,12 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui msg[30] = 0; msg[31] = 0x02000000; - c512(sharedMemory, state, msg); + c512(sharedMemory, state, msg, 512); #pragma unroll 16 for(int i=0;i<16;i++) Hash[i] = state[i]; - } + } // thread < threads } @@ -1369,10 +1425,34 @@ __host__ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t start dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); - // Größe des dynamischen Shared Memory Bereichs size_t shared_size = 0; x11_shavite512_gpu_hash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); MyStreamSynchronize(NULL, order, thr_id); } +__host__ void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +{ + const int threadsperblock = 256; + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + x11_shavite512_gpu_hash_80<<>>(threads, startNounce, d_outputHash); + + MyStreamSynchronize(NULL, order, thr_id); +} +__host__ void x11_shavite512_setBlock_80(void *pdata) +{ + // Message mit Padding bereitstellen + // lediglich die korrekte Nonce ist noch ab Byte 76 einzusetzen. + unsigned char PaddedMessage[128]; + memcpy(PaddedMessage, pdata, 80); + memset(PaddedMessage+80, 0, 48); + + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); +} + diff --git a/x13/cuda_haval512.cu b/x13/cuda_haval512.cu new file mode 100644 index 0000000000..8e6882a982 --- /dev/null +++ b/x13/cuda_haval512.cu @@ -0,0 +1,553 @@ +/* + * Haval-512 + * + * Built on cbuchner1's implementation, actual hashing code + * heavily based on phm's sgminer + * + */ + +/* + * Haval-512 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include +#include + + +#define USE_SHARED 1 + +#define SPH_C64(x) ((uint64_t)(x ## ULL)) +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + +#include "cuda_helper.h" + + + + +// aus heavy.cu +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +__constant__ uint32_t c_PaddedMessage80[32]; +static __constant__ uint32_t initVector[8]; + +static const uint32_t c_initVector[8] = { + SPH_C32(0x243F6A88), + SPH_C32(0x85A308D3), + SPH_C32(0x13198A2E), + SPH_C32(0x03707344), + SPH_C32(0xA4093822), + SPH_C32(0x299F31D0), + SPH_C32(0x082EFA98), + SPH_C32(0xEC4E6C89) +}; + +#define PASS1(n, in) { \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[ 0], SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[ 1], SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[ 2], SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[ 3], SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[ 4], SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[ 5], SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[ 6], SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[ 7], SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[ 8], SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[10], SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[11], SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[12], SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[13], SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[14], SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[15], SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[16], SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[17], SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[18], SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[19], SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[20], SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[21], SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[22], SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[23], SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in[24], SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in[25], SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in[26], SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in[27], SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in[28], SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in[29], SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in[30], SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in[31], SPH_C32(0x00000000)); \ + } + +#define PASS2(n, in) { \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[ 5], SPH_C32(0x452821E6)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[14], SPH_C32(0x38D01377)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[26], SPH_C32(0xBE5466CF)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[18], SPH_C32(0x34E90C6C)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[11], SPH_C32(0xC0AC29B7)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[28], SPH_C32(0xC97C50DD)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[ 7], SPH_C32(0x3F84D5B5)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[16], SPH_C32(0xB5470917)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[ 0], SPH_C32(0x9216D5D9)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[23], SPH_C32(0x8979FB1B)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[20], SPH_C32(0xD1310BA6)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[22], SPH_C32(0x98DFB5AC)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[ 1], SPH_C32(0x2FFD72DB)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[10], SPH_C32(0xD01ADFB7)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[ 4], SPH_C32(0xB8E1AFED)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[ 8], SPH_C32(0x6A267E96)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[30], SPH_C32(0xBA7C9045)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[ 3], SPH_C32(0xF12C7F99)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0x24A19947)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[ 9], SPH_C32(0xB3916CF7)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x0801F2E2)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[24], SPH_C32(0x858EFC16)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[29], SPH_C32(0x636920D8)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[ 6], SPH_C32(0x71574E69)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0xA458FEA3)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in[12], SPH_C32(0xF4933D7E)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in[15], SPH_C32(0x0D95748F)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in[13], SPH_C32(0x728EB658)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in[ 2], SPH_C32(0x718BCD58)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in[25], SPH_C32(0x82154AEE)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in[31], SPH_C32(0x7B54A41D)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in[27], SPH_C32(0xC25A59B5)); \ + } + +#define PASS3(n, in) { \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0x9C30D539)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x2AF26013)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[ 4], SPH_C32(0xC5D1B023)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[20], SPH_C32(0x286085F0)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[28], SPH_C32(0xCA417918)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[17], SPH_C32(0xB8DB38EF)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[ 8], SPH_C32(0x8E79DCB0)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[22], SPH_C32(0x603A180E)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[29], SPH_C32(0x6C9E0E8B)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[14], SPH_C32(0xB01E8A3E)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[25], SPH_C32(0xD71577C1)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[12], SPH_C32(0xBD314B27)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[24], SPH_C32(0x78AF2FDA)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[30], SPH_C32(0x55605C60)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[16], SPH_C32(0xE65525F3)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[26], SPH_C32(0xAA55AB94)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[31], SPH_C32(0x57489862)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[15], SPH_C32(0x63E81440)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[ 7], SPH_C32(0x55CA396A)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[ 3], SPH_C32(0x2AAB10B6)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[ 1], SPH_C32(0xB4CC5C34)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[ 0], SPH_C32(0x1141E8CE)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[18], SPH_C32(0xA15486AF)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[27], SPH_C32(0x7C72E993)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in[13], SPH_C32(0xB3EE1411)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in[ 6], SPH_C32(0x636FBC2A)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0x2BA9C55D)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in[10], SPH_C32(0x741831F6)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in[23], SPH_C32(0xCE5C3E16)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in[11], SPH_C32(0x9B87931E)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in[ 5], SPH_C32(0xAFD6BA33)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in[ 2], SPH_C32(0x6C24CF5C)); \ + } + +#define PASS4(n, in) { \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[24], SPH_C32(0x7A325381)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[ 4], SPH_C32(0x28958677)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[ 0], SPH_C32(0x3B8F4898)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[14], SPH_C32(0x6B4BB9AF)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[ 2], SPH_C32(0xC4BFE81B)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[ 7], SPH_C32(0x66282193)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[28], SPH_C32(0x61D809CC)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[23], SPH_C32(0xFB21A991)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[26], SPH_C32(0x487CAC60)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[ 6], SPH_C32(0x5DEC8032)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[30], SPH_C32(0xEF845D5D)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[20], SPH_C32(0xE98575B1)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[18], SPH_C32(0xDC262302)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[25], SPH_C32(0xEB651B88)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[19], SPH_C32(0x23893E81)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[ 3], SPH_C32(0xD396ACC5)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[22], SPH_C32(0x0F6D6FF3)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[11], SPH_C32(0x83F44239)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[31], SPH_C32(0x2E0B4482)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[21], SPH_C32(0xA4842004)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[ 8], SPH_C32(0x69C8F04A)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[27], SPH_C32(0x9E1F9B5E)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[12], SPH_C32(0x21C66842)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[ 9], SPH_C32(0xF6E96C9A)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in[ 1], SPH_C32(0x670C9C61)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in[29], SPH_C32(0xABD388F0)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in[ 5], SPH_C32(0x6A51A0D2)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in[15], SPH_C32(0xD8542F68)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x960FA728)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in[10], SPH_C32(0xAB5133A3)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in[16], SPH_C32(0x6EEF0B6C)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in[13], SPH_C32(0x137A3BE4)); \ + } + +#define PASS5(n, in) { \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[27], SPH_C32(0xBA3BF050)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 3], SPH_C32(0x7EFB2A98)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[21], SPH_C32(0xA1F1651D)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[26], SPH_C32(0x39AF0176)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[17], SPH_C32(0x66CA593E)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[11], SPH_C32(0x82430E88)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[20], SPH_C32(0x8CEE8619)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[29], SPH_C32(0x456F9FB4)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[19], SPH_C32(0x7D84A5C3)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 0], SPH_C32(0x3B8B5EBE)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[12], SPH_C32(0xE06F75D8)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[ 7], SPH_C32(0x85C12073)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[13], SPH_C32(0x401A449F)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 8], SPH_C32(0x56C16AA6)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[31], SPH_C32(0x4ED3AA62)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[10], SPH_C32(0x363F7706)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[ 5], SPH_C32(0x1BFEDF72)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[ 9], SPH_C32(0x429B023D)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[14], SPH_C32(0x37D0D724)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[30], SPH_C32(0xD00A1248)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[18], SPH_C32(0xDB0FEAD3)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 6], SPH_C32(0x49F1C09B)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[28], SPH_C32(0x075372C9)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[24], SPH_C32(0x80991B7B)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in[ 2], SPH_C32(0x25D479D8)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in[23], SPH_C32(0xF6E8DEF7)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in[16], SPH_C32(0xE3FE501A)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in[22], SPH_C32(0xB6794C3B)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in[ 4], SPH_C32(0x976CE0BD)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in[ 1], SPH_C32(0x04C006BA)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in[25], SPH_C32(0xC1A94FB6)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in[15], SPH_C32(0x409F60C4)); \ + } + +#define F1(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & ((x0) ^ (x4))) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ (x0)) + + +#define F2(x6, x5, x4, x3, x2, x1, x0) \ + (((x2) & (((x1) & ~(x3)) ^ ((x4) & (x5)) ^ (x6) ^ (x0))) \ + ^ ((x4) & ((x1) ^ (x5))) ^ ((x3 & (x5)) ^ (x0))) + + +#define F3(x6, x5, x4, x3, x2, x1, x0) \ + (((x3) & (((x1) & (x2)) ^ (x6) ^ (x0))) \ + ^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ (x0)) + + +#define F4(x6, x5, x4, x3, x2, x1, x0) \ + (((x3) & (((x1) & (x2)) ^ ((x4) | (x6)) ^ (x5))) \ + ^ ((x4) & ((~(x2) & (x5)) ^ (x1) ^ (x6) ^ (x0))) \ + ^ ((x2) & (x6)) ^ (x0)) + +#define F5(x6, x5, x4, x3, x2, x1, x0) \ + (((x0) & ~(((x1) & (x2) & (x3)) ^ (x5))) \ + ^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6))) + +#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \ + F1(x3, x4, x1, x0, x5, x2, x6) +#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \ + F2(x6, x2, x1, x0, x3, x4, x5) +#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \ + F3(x2, x6, x0, x4, x3, x1, x5) +#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \ + F4(x1, x5, x3, x2, x0, x4, x6) +#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \ + F5(x2, x5, x0, x6, x4, x3, x1) + + +#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) { \ + uint32_t t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \ + (x7) = SPH_T32(SPH_ROTR32(t, 7) + SPH_ROTR32((x7), 11) \ + + (w) + (c)); \ + } + +__global__ void m7_haval256_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread; + +union { +uint32_t h4[16]; +uint64_t h8[8]; +} hash; + + + uint32_t u0, u1, u2, u3, u4, u5, u6, u7; + uint32_t s0,s1,s2,s3,s4,s5,s6,s7; + uint32_t buf[32]; + s0 = initVector[0]; + s1 = initVector[1]; + s2 = initVector[2]; + s3 = initVector[3]; + s4 = initVector[4]; + s5 = initVector[5]; + s6 = initVector[6]; + s7 = initVector[7]; + + u0 = s0; + u1 = s1; + u2 = s2; + u3 = s3; + u4 = s4; + u5 = s5; + u6 = s6; + u7 = s7; +///////// input big ///////////////////// +#pragma unroll 29 + for (int i=0;i<29;i++) { + buf[i]=c_PaddedMessage80[i];} + buf[29]=nounce; + buf[30]=c_PaddedMessage80[30]+0x00010000; //need to fix that + buf[31]=0; + + PASS1(5, buf); + PASS2(5, buf); + PASS3(5, buf); + PASS4(5, buf); + PASS5(5, buf); + + + s0 = sph_t32(s0 + u0); + s1 = sph_t32(s1 + u1); + s2 = sph_t32(s2 + u2); + s3 = sph_t32(s3 + u3); + s4 = sph_t32(s4 + u4); + s5 = sph_t32(s5 + u5); + s6 = sph_t32(s6 + u6); + s7 = sph_t32(s7 + u7); + u0 = s0; + u1 = s1; + u2 = s2; + u3 = s3; + u4 = s4; + u5 = s5; + u6 = s6; + u7 = s7; + + +///////////////////// +#pragma unroll 32 + for (int i=0;i<32;i++) {buf[i]=0;} + + buf[29]=0x40290000; + buf[30]=0x000003d0; + + + + PASS1(5, buf); + PASS2(5, buf); + PASS3(5, buf); + PASS4(5, buf); + PASS5(5, buf); + + + s0 = sph_t32(s0 + u0); + s1 = sph_t32(s1 + u1); + s2 = sph_t32(s2 + u2); + s3 = sph_t32(s3 + u3); + s4 = sph_t32(s4 + u4); + s5 = sph_t32(s5 + u5); + s6 = sph_t32(s6 + u6); + s7 = sph_t32(s7 + u7); +//////////////////// + hash.h4[0]=s0; + hash.h4[1]=s1; + hash.h4[2]=s2; + hash.h4[3]=s3; + hash.h4[4]=s4; + hash.h4[5]=s5; + hash.h4[6]=s6; + hash.h4[7]=s7; + +#pragma unroll 4 +for (int i=0;i<4;i++) {outputHash[i*threads+thread]=hash.h8[i];} + } // threads +} + +__global__ void haval256_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); + + int hashPosition = nounce - startNounce; + + + uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition]; + + +union { +uint8_t h1[64]; +uint32_t h4[16]; +uint64_t h8[8]; +} hash; + + + uint32_t u0, u1, u2, u3, u4, u5, u6, u7; + uint32_t s0,s1,s2,s3,s4,s5,s6,s7; + uint32_t buf[32]; + s0 = initVector[0]; + s1 = initVector[1]; + s2 = initVector[2]; + s3 = initVector[3]; + s4 = initVector[4]; + s5 = initVector[5]; + s6 = initVector[6]; + s7 = initVector[7]; + + u0 = s0; + u1 = s1; + u2 = s2; + u3 = s3; + u4 = s4; + u5 = s5; + u6 = s6; + u7 = s7; + + + #pragma unroll 16 + for (int i=0;i<16;i++) { + hash.h4[i]= inpHash[i];} + +///////// input big ///////////////////// +#pragma unroll 32 + for (int i=0;i<32;i++) { + if (i<16) {buf[i]=hash.h4[i];} else { + buf[i]=0;}} + buf[16]=0x00000001; + buf[29]=0x40290000; + buf[30]=0x00000200; + + PASS1(5, buf); + PASS2(5, buf); + PASS3(5, buf); + PASS4(5, buf); + PASS5(5, buf); + + + s0 = sph_t32(s0 + u0); + s1 = sph_t32(s1 + u1); + s2 = sph_t32(s2 + u2); + s3 = sph_t32(s3 + u3); + s4 = sph_t32(s4 + u4); + s5 = sph_t32(s5 + u5); + s6 = sph_t32(s6 + u6); + s7 = sph_t32(s7 + u7); + + hash.h4[0]=s0; + hash.h4[1]=s1; + hash.h4[2]=s2; + hash.h4[3]=s3; + hash.h4[4]=s4; + hash.h4[5]=s5; + hash.h4[6]=s6; + hash.h4[7]=s7; + + #pragma unroll 16 + for (int u = 0; u < 16; u ++) + inpHash[u] = hash.h4[u]; + } // threads +} + + +void haval256_cpu_init(int thr_id, int threads) +{ + + + cudaMemcpyToSymbol(initVector,c_initVector,sizeof(c_initVector),0, cudaMemcpyHostToDevice); + +} + +__host__ void haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +{ + + const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + haval256_gpu_hash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); + + MyStreamSynchronize(NULL, order, thr_id); +} + +__host__ void haval256_setBlock_120(void *pdata) +{ + unsigned char PaddedMessage[128]; + memcpy(PaddedMessage, pdata, 122); + memset(PaddedMessage+122, 0, 6); + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); + +} + +__host__ void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 256; // Alignment mit mixtob Grösse. NICHT ÄNDERN + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid(threads/threadsperblock); + dim3 block(threadsperblock); +// dim3 grid(1); +// dim3 block(1); + size_t shared_size = 0; + + m7_haval256_gpu_hash_120<<>>(threads, startNounce, d_outputHash); + + MyStreamSynchronize(NULL, order, thr_id); +} + diff --git a/x13/cuda_m7_sha256.cu b/x13/cuda_m7_sha256.cu new file mode 100644 index 0000000000..fcc92fc74d --- /dev/null +++ b/x13/cuda_m7_sha256.cu @@ -0,0 +1,526 @@ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include +#include + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +#define SPH_C64(x) ((uint64_t)(x ## ULL)) +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define ROTR SPH_ROTR32 +#include "cuda_helper.h" +#define host_swab32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) + + __constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding) +__constant__ uint64_t pTarget[4]; +__constant__ uint32_t pbuf[8]; +uint32_t *d_mnounce[8]; +uint32_t *d_MNonce[8]; + + +static __constant__ uint32_t H256[8]; +static __constant__ uint32_t K[64]; +// muss expandiert werden +__constant__ uint32_t sha256_gpu_blockHeader[16]; // 2x512 Bit Message +__constant__ uint32_t sha256_gpu_register[8]; + + +static const uint32_t cpu_H256[8] = { + SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372), + SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C), + SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19) +}; +static const uint32_t cpu_K[64] = { + SPH_C32(0x428A2F98), SPH_C32(0x71374491), + SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5), + SPH_C32(0x3956C25B), SPH_C32(0x59F111F1), + SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5), + SPH_C32(0xD807AA98), SPH_C32(0x12835B01), + SPH_C32(0x243185BE), SPH_C32(0x550C7DC3), + SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE), + SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174), + SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786), + SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC), + SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA), + SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA), + SPH_C32(0x983E5152), SPH_C32(0xA831C66D), + SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7), + SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147), + SPH_C32(0x06CA6351), SPH_C32(0x14292967), + SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138), + SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13), + SPH_C32(0x650A7354), SPH_C32(0x766A0ABB), + SPH_C32(0x81C2C92E), SPH_C32(0x92722C85), + SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B), + SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3), + SPH_C32(0xD192E819), SPH_C32(0xD6990624), + SPH_C32(0xF40E3585), SPH_C32(0x106AA070), + SPH_C32(0x19A4C116), SPH_C32(0x1E376C08), + SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5), + SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A), + SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3), + SPH_C32(0x748F82EE), SPH_C32(0x78A5636F), + SPH_C32(0x84C87814), SPH_C32(0x8CC70208), + SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB), + SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2) +}; + + +static __device__ __forceinline__ uint32_t bsg2_0(uint32_t x) +{ + uint32_t r1 = SPH_ROTR32(x,2); + uint32_t r2 = SPH_ROTR32(x,13); + uint32_t r3 = SPH_ROTR32(x,22); + return xor3b(r1,r2,r3); +} +static __device__ __forceinline__ uint32_t bsg2_1(uint32_t x) +{ + uint32_t r1 = SPH_ROTR32(x,6); + uint32_t r2 = SPH_ROTR32(x,11); + uint32_t r3 = SPH_ROTR32(x,25); + return xor3b(r1,r2,r3); +} +static __device__ __forceinline__ uint32_t ssg2_0(uint32_t x) +{ + uint64_t r1 = SPH_ROTR32(x,7); + uint64_t r2 = SPH_ROTR32(x,18); + uint64_t r3 = shr_t32(x,3); + return xor3b(r1,r2,r3); +} +static __device__ __forceinline__ uint32_t ssg2_1(uint32_t x) +{ + uint64_t r1 = SPH_ROTR32(x,17); + uint64_t r2 = SPH_ROTR32(x,19); + uint64_t r3 = shr_t32(x,10); + return xor3b(r1,r2,r3); +} + +static __device__ __forceinline__ void sha2_step1(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h, + uint32_t in,const uint32_t Kshared) +{ +uint32_t t1,t2; +uint32_t vxandx = xandx(e, f, g); +uint32_t bsg21 =bsg2_1(e); +uint32_t bsg20 =bsg2_0(a); +uint32_t andorv =andor32(a,b,c); + +t1 = h + bsg21 + vxandx + Kshared + in; +t2 = bsg20 + andorv; +d = d + t1; +h = t1 + t2; +} + +static __forceinline__ void sha2_step1_host(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h, + uint32_t in,const uint32_t Kshared) +{ + + + +uint32_t t1,t2; +uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g); +uint32_t bsg21 =ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e); +uint32_t bsg20 =ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a); +uint32_t andorv =((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c); + +t1 = h + bsg21 + vxandx + Kshared + in; +t2 = bsg20 + andorv; +d = d + t1; +h = t1 + t2; +} + +static __device__ __forceinline__ void sha2_step2(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h, + uint32_t* in,uint32_t pc,const uint32_t Kshared) +{ +uint32_t t1,t2; + +int pcidx1 = (pc-2) & 0xF; +int pcidx2 = (pc-7) & 0xF; +int pcidx3 = (pc-15) & 0xF; +uint32_t inx0 = in[pc]; +uint32_t inx1 = in[pcidx1]; +uint32_t inx2 = in[pcidx2]; +uint32_t inx3 = in[pcidx3]; + + +uint32_t ssg21 = ssg2_1(inx1); +uint32_t ssg20 = ssg2_0(inx3); +uint32_t vxandx = xandx(e, f, g); +uint32_t bsg21 =bsg2_1(e); +uint32_t bsg20 =bsg2_0(a); +uint32_t andorv =andor32(a,b,c); + +in[pc] = ssg21+inx2+ssg20+inx0; + +t1 = h + bsg21 + vxandx + Kshared + in[pc]; +t2 = bsg20 + andorv; +d = d + t1; +h = t1 + t2; + +} + +static __forceinline__ void sha2_step2_host(uint32_t a,uint32_t b,uint32_t c,uint32_t &d,uint32_t e,uint32_t f,uint32_t g,uint32_t &h, + uint32_t* in,uint32_t pc,const uint32_t Kshared) +{ +uint32_t t1,t2; + +int pcidx1 = (pc-2) & 0xF; +int pcidx2 = (pc-7) & 0xF; +int pcidx3 = (pc-15) & 0xF; +uint32_t inx0 = in[pc]; +uint32_t inx1 = in[pcidx1]; +uint32_t inx2 = in[pcidx2]; +uint32_t inx3 = in[pcidx3]; + + +uint32_t ssg21 = ROTR(inx1, 17) ^ ROTR(inx1, 19) ^ SPH_T32((inx1) >> 10); //ssg2_1(inx1); +uint32_t ssg20 = ROTR(inx3, 7) ^ ROTR(inx3, 18) ^ SPH_T32((inx3) >> 3); //ssg2_0(inx3); +uint32_t vxandx = (((f) ^ (g)) & (e)) ^ (g); // xandx(e, f, g); +uint32_t bsg21 =ROTR(e, 6) ^ ROTR(e, 11) ^ ROTR(e, 25); // bsg2_1(e); +uint32_t bsg20 =ROTR(a, 2) ^ ROTR(a, 13) ^ ROTR(a, 22); //bsg2_0(a); +uint32_t andorv =((b) & (c)) | (((b) | (c)) & (a)); //andor32(a,b,c); + +in[pc] = ssg21+inx2+ssg20+inx0; + +t1 = h + bsg21 + vxandx + Kshared + in[pc]; +t2 = bsg20 + andorv; +d = d + t1; +h = t1 + t2; + +} + + +static __device__ __forceinline__ void sha2_round_body(uint32_t* in, uint32_t* r,const uint32_t* Kshared) +{ + + + uint32_t a=r[0]; + uint32_t b=r[1]; + uint32_t c=r[2]; + uint32_t d=r[3]; + uint32_t e=r[4]; + uint32_t f=r[5]; + uint32_t g=r[6]; + uint32_t h=r[7]; + + sha2_step1(a,b,c,d,e,f,g,h,in[0],Kshared[0]); + sha2_step1(h,a,b,c,d,e,f,g,in[1],Kshared[1]); + sha2_step1(g,h,a,b,c,d,e,f,in[2],Kshared[2]); + sha2_step1(f,g,h,a,b,c,d,e,in[3],Kshared[3]); + sha2_step1(e,f,g,h,a,b,c,d,in[4],Kshared[4]); + sha2_step1(d,e,f,g,h,a,b,c,in[5],Kshared[5]); + sha2_step1(c,d,e,f,g,h,a,b,in[6],Kshared[6]); + sha2_step1(b,c,d,e,f,g,h,a,in[7],Kshared[7]); + sha2_step1(a,b,c,d,e,f,g,h,in[8],Kshared[8]); + sha2_step1(h,a,b,c,d,e,f,g,in[9],Kshared[9]); + sha2_step1(g,h,a,b,c,d,e,f,in[10],Kshared[10]); + sha2_step1(f,g,h,a,b,c,d,e,in[11],Kshared[11]); + sha2_step1(e,f,g,h,a,b,c,d,in[12],Kshared[12]); + sha2_step1(d,e,f,g,h,a,b,c,in[13],Kshared[13]); + sha2_step1(c,d,e,f,g,h,a,b,in[14],Kshared[14]); + sha2_step1(b,c,d,e,f,g,h,a,in[15],Kshared[15]); + +#pragma unroll 3 + for (int i=0;i<3;i++) { + + sha2_step2(a,b,c,d,e,f,g,h,in,0,Kshared[16+16*i]); + sha2_step2(h,a,b,c,d,e,f,g,in,1,Kshared[17+16*i]); + sha2_step2(g,h,a,b,c,d,e,f,in,2,Kshared[18+16*i]); + sha2_step2(f,g,h,a,b,c,d,e,in,3,Kshared[19+16*i]); + sha2_step2(e,f,g,h,a,b,c,d,in,4,Kshared[20+16*i]); + sha2_step2(d,e,f,g,h,a,b,c,in,5,Kshared[21+16*i]); + sha2_step2(c,d,e,f,g,h,a,b,in,6,Kshared[22+16*i]); + sha2_step2(b,c,d,e,f,g,h,a,in,7,Kshared[23+16*i]); + sha2_step2(a,b,c,d,e,f,g,h,in,8,Kshared[24+16*i]); + sha2_step2(h,a,b,c,d,e,f,g,in,9,Kshared[25+16*i]); + sha2_step2(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]); + sha2_step2(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]); + sha2_step2(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]); + sha2_step2(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]); + sha2_step2(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]); + sha2_step2(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]); + + } + + + + r[0] = r[0] + a; + r[1] = r[1] + b; + r[2] = r[2] + c; + r[3] = r[3] + d; + r[4] = r[4] + e; + r[5] = r[5] + f; + r[6] = r[6] + g; + r[7] = r[7] + h; +} + +static __forceinline__ void sha2_round_body_host(uint32_t* in, uint32_t* r,const uint32_t* Kshared) +{ + + + uint32_t a=r[0]; + uint32_t b=r[1]; + uint32_t c=r[2]; + uint32_t d=r[3]; + uint32_t e=r[4]; + uint32_t f=r[5]; + uint32_t g=r[6]; + uint32_t h=r[7]; + + sha2_step1_host(a,b,c,d,e,f,g,h,in[0],Kshared[0]); + sha2_step1_host(h,a,b,c,d,e,f,g,in[1],Kshared[1]); + sha2_step1_host(g,h,a,b,c,d,e,f,in[2],Kshared[2]); + sha2_step1_host(f,g,h,a,b,c,d,e,in[3],Kshared[3]); + sha2_step1_host(e,f,g,h,a,b,c,d,in[4],Kshared[4]); + sha2_step1_host(d,e,f,g,h,a,b,c,in[5],Kshared[5]); + sha2_step1_host(c,d,e,f,g,h,a,b,in[6],Kshared[6]); + sha2_step1_host(b,c,d,e,f,g,h,a,in[7],Kshared[7]); + sha2_step1_host(a,b,c,d,e,f,g,h,in[8],Kshared[8]); + sha2_step1_host(h,a,b,c,d,e,f,g,in[9],Kshared[9]); + sha2_step1_host(g,h,a,b,c,d,e,f,in[10],Kshared[10]); + sha2_step1_host(f,g,h,a,b,c,d,e,in[11],Kshared[11]); + sha2_step1_host(e,f,g,h,a,b,c,d,in[12],Kshared[12]); + sha2_step1_host(d,e,f,g,h,a,b,c,in[13],Kshared[13]); + sha2_step1_host(c,d,e,f,g,h,a,b,in[14],Kshared[14]); + sha2_step1_host(b,c,d,e,f,g,h,a,in[15],Kshared[15]); + + + for (int i=0;i<3;i++) { + + sha2_step2_host(a,b,c,d,e,f,g,h,in,0,Kshared[16+16*i]); + sha2_step2_host(h,a,b,c,d,e,f,g,in,1,Kshared[17+16*i]); + sha2_step2_host(g,h,a,b,c,d,e,f,in,2,Kshared[18+16*i]); + sha2_step2_host(f,g,h,a,b,c,d,e,in,3,Kshared[19+16*i]); + sha2_step2_host(e,f,g,h,a,b,c,d,in,4,Kshared[20+16*i]); + sha2_step2_host(d,e,f,g,h,a,b,c,in,5,Kshared[21+16*i]); + sha2_step2_host(c,d,e,f,g,h,a,b,in,6,Kshared[22+16*i]); + sha2_step2_host(b,c,d,e,f,g,h,a,in,7,Kshared[23+16*i]); + sha2_step2_host(a,b,c,d,e,f,g,h,in,8,Kshared[24+16*i]); + sha2_step2_host(h,a,b,c,d,e,f,g,in,9,Kshared[25+16*i]); + sha2_step2_host(g,h,a,b,c,d,e,f,in,10,Kshared[26+16*i]); + sha2_step2_host(f,g,h,a,b,c,d,e,in,11,Kshared[27+16*i]); + sha2_step2_host(e,f,g,h,a,b,c,d,in,12,Kshared[28+16*i]); + sha2_step2_host(d,e,f,g,h,a,b,c,in,13,Kshared[29+16*i]); + sha2_step2_host(c,d,e,f,g,h,a,b,in,14,Kshared[30+16*i]); + sha2_step2_host(b,c,d,e,f,g,h,a,in,15,Kshared[31+16*i]); + + } + + r[0] = r[0] + a; + r[1] = r[1] + b; + r[2] = r[2] + c; + r[3] = r[3] + d; + r[4] = r[4] + e; + r[5] = r[5] + f; + r[6] = r[6] + g; + r[7] = r[7] + h; +} + + +__global__ void __launch_bounds__(512,1) m7_sha256_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread ; // original implementation + + uint32_t buf[8]; + uint32_t in2[16]={0}; + uint32_t in3[16]={0}; + + #pragma unroll 13 + for (int i=0;i<13;i++) {in2[i]= cuda_swab32(c_PaddedMessage80[i+16]);} + in2[13]=cuda_swab32(nounce); + in2[14]=cuda_swab32(c_PaddedMessage80[30]); + + in3[15]=0x3d0; + + #pragma unroll 8 + for (int i=0;i<8;i++) {buf[i]= pbuf[i];} + + sha2_round_body(in2,buf,K); + sha2_round_body(in3,buf,K); + +#pragma unroll 4 +for (int i=0;i<4;i++) {outputHash[i*threads+thread]=cuda_swab32ll(((uint64_t*)buf)[i]);} + + +////////////////////////////////////////////////////////////////////////////////////////////////// + } // threads + +} + + +__global__ void m7_sha256_gpu_hash_300(int threads, uint32_t startNounce, uint64_t *g_hash1, uint64_t *g_nonceVector, uint32_t *resNounce) +{ +/* + __shared__ uint32_t Kshared[64]; + if (threadIdx.x < 64) { + Kshared[threadIdx.x]=K[threadIdx.x]; + } + __syncthreads(); +*/ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + + + +union { +uint8_t h1[304]; +uint32_t h4[76]; +uint64_t h8[38]; +} hash; + + + uint32_t in[16],buf[8]; + + + #pragma unroll 8 + for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*i+thread]);} + #pragma unroll 8 + for (int i=0;i<8;i++) {buf[i] = H256[i];} + + sha2_round_body(in,buf,K); + + #pragma unroll 8 + for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+8)+thread]);} + sha2_round_body(in,buf,K); + + #pragma unroll 8 + for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+16)+thread]);} + sha2_round_body(in,buf,K); + + #pragma unroll 8 + for (int i=0;i<8;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+24)+thread]);} + sha2_round_body(in,buf,K); + + #pragma unroll 5 + for (int i=0;i<5;i++) {((uint64_t*)in)[i]= cuda_swab32ll(g_hash1[threads*(i+32)+thread]);} + ((uint64_t*)in)[5]= g_hash1[threads*(5+32)+thread]; + in[11]=0; + in[12]=0; + in[13]=0; + in[14]=0; + + + in[15]=0x968; + + int it=0; + do { + in[15]-=8; + it++; + } while (((uint8_t*)in)[44-it]==0); + ((uint8_t*)in)[44-it+1]=0x80; + + ((uint64_t*)in)[5]= cuda_swab32ll(((uint64_t*)in)[5]); + + sha2_round_body(in,buf,K); + +uint32_t nounce = startNounce +thread; + bool rc = true; + + + if (cuda_swab32ll(((uint64_t*)buf)[3]) > pTarget[3]) {rc = false;} +//// only needed for solo mining, commenting it out will probably increased rejected block (no big deal actually) + /* + else if (cuda_swab32ll(((uint64_t*)buf)[3]) == pTarget[3]) { // in case ptarget=buf=0 + if (cuda_swab32ll(((uint64_t*)buf)[2]) > pTarget[2]) {rc = false;} + else if (cuda_swab32ll(((uint64_t*)buf)[2]) == pTarget[2]) { + if (cuda_swab32ll(((uint64_t*)buf)[1]) > pTarget[1]) {rc = false;} + else if (cuda_swab32ll(((uint64_t*)buf)[1]) == pTarget[1]) { + if (cuda_swab32ll(((uint64_t*)buf)[0]) > pTarget[0]) {rc = false;} + else if (cuda_swab32ll(((uint64_t*)buf)[0]) == pTarget[0]) {rc = true;} + }}} + */ + + + + if(rc == true) + { + if(resNounce[0] > nounce) + resNounce[0] = nounce; + + } + + +//// + } // threads +} + + + +__host__ void m7_sha256_cpu_init(int thr_id, int threads) +{ + // Kopiere die Hash-Tabellen in den GPU-Speicher + cudaMemcpyToSymbol( H256,cpu_H256,sizeof(cpu_H256),0, cudaMemcpyHostToDevice ); + cudaMemcpyToSymbol( K,cpu_K,sizeof(cpu_K),0, cudaMemcpyHostToDevice ); + cudaMalloc(&d_MNonce[thr_id], sizeof(uint32_t)); + cudaMallocHost(&d_mnounce[thr_id], 1*sizeof(uint32_t)); +} + + +__host__ uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint64_t *d_nonceVector,uint64_t *d_hash, int order) +{ + + uint32_t result = 0xffffffff; + cudaMemset(d_MNonce[thr_id], 0xff, sizeof(uint32_t)); + //const int threadsperblock = 384; // Alignment mit mixtob Grösse. NICHT ÄNDERN + const int threadsperblock = 512; + + dim3 grid(threads/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + + m7_sha256_gpu_hash_300<<>>(threads, startNounce, d_hash, d_nonceVector, d_MNonce[thr_id]); + cudaMemcpy(d_mnounce[thr_id], d_MNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); + MyStreamSynchronize(NULL, order, thr_id); + result = *d_mnounce[thr_id]; + return result; +} + + +__host__ void m7_sha256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 512; // Alignment mit mixtob Grösse. NICHT ÄNDERN + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid(threads/threadsperblock); + dim3 block(threadsperblock); +// dim3 grid(1); +// dim3 block(1); + size_t shared_size = 0; + + m7_sha256_gpu_hash_120<<>>(threads, startNounce, d_outputHash); + + MyStreamSynchronize(NULL, order, thr_id); +} + +__host__ void m7_sha256_setBlock_120(void *pdata,const void *ptarget) //not useful +{ + unsigned char PaddedMessage[128]; + uint8_t ending =0x80; + memcpy(PaddedMessage, pdata, 122); + memset(PaddedMessage+122,ending,1); + memset(PaddedMessage+123, 0, 5); //useless + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol( pTarget, ptarget, 4*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + /// do first loop here... /// + + uint32_t * alt_data = (uint32_t*) PaddedMessage; + uint32_t in[16],buf[8]; + for (int i=0;i<16;i++) {in[i]= host_swab32(alt_data[i]);} + for (int i=0;i<8;i++) {buf[i]= cpu_H256[i];} + sha2_round_body_host(in,buf,cpu_K); + cudaMemcpyToSymbol( pbuf, buf, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); +} diff --git a/x13/cuda_mul.cu b/x13/cuda_mul.cu new file mode 100644 index 0000000000..53794e041d --- /dev/null +++ b/x13/cuda_mul.cu @@ -0,0 +1,366 @@ +/* + * tiger-192 djm34 + * + */ + +/* + * tiger-192 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + + +#include "cuda_helper.h" + + +// aus heavy.cu + +extern int device_major[8]; +extern int device_minor[8]; +extern int compute_version[8]; +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +static __forceinline__ __device__ void mul_unroll1_core_test(int threads, int thread, uint64_t* am, uint64_t* bm, uint64_t *w) +{ + + uint32_t B0, B1, B2, B3, B4, B5; + LOHI(B0, B1, am[thread]); + LOHI(B2, B3, am[threads + thread]); + LOHI(B4, B5, am[2 * threads + thread]); + + + +#pragma unroll + for (int i = 0; i<35; i++) { w[i*threads + thread] = 0; } +#if __CUDA_ARCH__ < 500 +#pragma unroll +#endif + for (int i = 0; i<32; i++) { + uint32_t Q0; + uint32_t Q1; + LOHI(Q0, Q1, bm[i*threads + thread]); + // uint32_t W0,W1,W2,W3,W4,W5,W6,W7; + uint4 Wa, Wb; + LOHI(Wa.x, Wa.y, w[i*threads + thread]); + LOHI(Wa.z, Wa.w, w[(i + 1)*threads + thread]); + LOHI(Wb.x, Wb.y, w[(i + 2)*threads + thread]); + LOHI(Wb.z, Wb.w, w[(i + 3)*threads + thread]); + + + asm("{\n\t" + ".reg .u32 b0,b1; \n\t" + "mad.lo.cc.u32 b0,%7,%13,%0; \n\t" + "madc.hi.cc.u32 b1,%7,%13,0; \n\t" + "mov.u32 %0,b0; \n\t" + "madc.lo.cc.u32 b1,%8,%13,b1; \n\t" + "madc.hi.cc.u32 b0,%8,%13,0; \n\t" + "add.cc.u32 b1,b1,%1; \n\t" + "mov.u32 %1,b1; \n\t" + "madc.lo.cc.u32 b0,%9,%13,b0; \n\t" + "madc.hi.cc.u32 b1,%9,%13,0; \n\t" + "add.cc.u32 b0,b0,%2; \n\t" + "mov.u32 %2,b0; \n\t" + "madc.lo.cc.u32 b1,%10,%13,b1; \n\t" + "madc.hi.cc.u32 b0,%10,%13,0; \n\t" + "add.cc.u32 b1,b1,%3; \n\t" + "mov.u32 %3,b1; \n\t" + "madc.lo.cc.u32 b0,%11,%13,b0; \n\t" + "madc.hi.cc.u32 b1,%11,%13,0; \n\t" + "add.cc.u32 b0,b0,%4; \n\t" + "mov.u32 %4,b0; \n\t" + "madc.lo.cc.u32 b1,%12,%13,b1; \n\t" + "madc.hi.cc.u32 %6,%12,%13,0; \n\t" + "add.cc.u32 b1,b1,%5; \n\t" + "addc.u32 %6,%6,0; \n\t" + "mov.u32 %5,b1; \n\t" + "}\n\t" + : "+r"(Wa.x), "+r"(Wa.y), "+r"(Wa.z), "+r"(Wa.w), "+r"(Wb.x), "+r"(Wb.y), "+r"(Wb.z) + : "r"(B0), "r"(B1), "r"(B2), "r"(B3), "r"(B4), "r"(B5), "r"(Q0)); + /////////////////////////// + asm("{\n\t" + ".reg .u32 b0,b1; \n\t" + "mad.lo.cc.u32 b0,%7,%13,%0; \n\t" + "madc.hi.cc.u32 b1,%7,%13,0; \n\t" + "mov.u32 %0,b0; \n\t" + "madc.lo.cc.u32 b1,%8,%13,b1; \n\t" + "madc.hi.cc.u32 b0,%8,%13,0; \n\t" + "add.cc.u32 b1,b1,%1; \n\t" + "mov.u32 %1,b1; \n\t" + "madc.lo.cc.u32 b0,%9,%13,b0; \n\t" + "madc.hi.cc.u32 b1,%9,%13,0; \n\t" + "add.cc.u32 b0,b0,%2; \n\t" + "mov.u32 %2,b0; \n\t" + "madc.lo.cc.u32 b1,%10,%13,b1; \n\t" + "madc.hi.cc.u32 b0,%10,%13,0; \n\t" + "add.cc.u32 b1,b1,%3; \n\t" + "mov.u32 %3,b1; \n\t" + "madc.lo.cc.u32 b0,%11,%13,b0; \n\t" + "madc.hi.cc.u32 b1,%11,%13,0; \n\t" + "add.cc.u32 b0,b0,%4; \n\t" + "mov.u32 %4,b0; \n\t" + "madc.lo.cc.u32 b1,%12,%13,b1; \n\t" + "madc.hi.cc.u32 %6,%12,%13,0; \n\t" + "add.cc.u32 b1,b1,%5; \n\t" + "addc.u32 %6,%6,0; \n\t" + "mov.u32 %5,b1; \n\t" + "}\n\t" + : "+r"(Wa.y), "+r"(Wa.z), "+r"(Wa.w), "+r"(Wb.x), "+r"(Wb.y), "+r"(Wb.z), "+r"(Wb.w) + : "r"(B0), "r"(B1), "r"(B2), "r"(B3), "r"(B4), "r"(B5), "r"(Q1)); + + w[i*threads + thread] = MAKE_ULONGLONG(Wa.x, Wa.y); + w[(i + 1)*threads + thread] = MAKE_ULONGLONG(Wa.z, Wa.w); + w[(i + 2)*threads + thread] = MAKE_ULONGLONG(Wb.x, Wb.y); + w[(i + 3)*threads + thread] = MAKE_ULONGLONG(Wb.z, Wb.w); + + + + } + +} + +static __forceinline__ __device__ void mul_unroll2_core_test(int threads, int thread, uint64_t* am, uint64_t* bm, uint64_t *w) +{ + + uint32_t B0, B1, B2, B3, B4, B5; + LOHI(B0, B1, am[thread]); + LOHI(B2, B3, am[threads + thread]); + LOHI(B4, B5, am[2 * threads + thread]); + + + +#pragma unroll + for (int i = 0; i<38; i++) { w[i*threads + thread] = 0; } +#if __CUDA_ARCH__ < 500 +#pragma unroll +#endif + for (int i = 0; i<35; i++) { + uint32_t Q0; + uint32_t Q1; + LOHI(Q0, Q1, bm[i*threads + thread]); + // uint32_t W0, W1, W2, W3, W4, W5, W6, W7; + uint4 Wa, Wb; + LOHI(Wa.x, Wa.y, w[i*threads + thread]); + LOHI(Wa.z, Wa.w, w[(i + 1)*threads + thread]); + LOHI(Wb.x, Wb.y, w[(i + 2)*threads + thread]); + LOHI(Wb.z, Wb.w, w[(i + 3)*threads + thread]); + + + asm("{\n\t" + ".reg .u32 b0,b1; \n\t" + "mad.lo.cc.u32 b0,%7,%13,%0; \n\t" + "madc.hi.cc.u32 b1,%7,%13,0; \n\t" + "mov.u32 %0,b0; \n\t" + "madc.lo.cc.u32 b1,%8,%13,b1; \n\t" + "madc.hi.cc.u32 b0,%8,%13,0; \n\t" + "add.cc.u32 b1,b1,%1; \n\t" + "mov.u32 %1,b1; \n\t" + "madc.lo.cc.u32 b0,%9,%13,b0; \n\t" + "madc.hi.cc.u32 b1,%9,%13,0; \n\t" + "add.cc.u32 b0,b0,%2; \n\t" + "mov.u32 %2,b0; \n\t" + "madc.lo.cc.u32 b1,%10,%13,b1; \n\t" + "madc.hi.cc.u32 b0,%10,%13,0; \n\t" + "add.cc.u32 b1,b1,%3; \n\t" + "mov.u32 %3,b1; \n\t" + "madc.lo.cc.u32 b0,%11,%13,b0; \n\t" + "madc.hi.cc.u32 b1,%11,%13,0; \n\t" + "add.cc.u32 b0,b0,%4; \n\t" + "mov.u32 %4,b0; \n\t" + "madc.lo.cc.u32 b1,%12,%13,b1; \n\t" + "madc.hi.cc.u32 %6,%12,%13,0; \n\t" + "add.cc.u32 b1,b1,%5; \n\t" + "addc.u32 %6,%6,0; \n\t" + "mov.u32 %5,b1; \n\t" + "}\n\t" + : "+r"(Wa.x), "+r"(Wa.y), "+r"(Wa.z), "+r"(Wa.w), "+r"(Wb.x), "+r"(Wb.y), "+r"(Wb.z) + : "r"(B0), "r"(B1), "r"(B2), "r"(B3), "r"(B4), "r"(B5), "r"(Q0)); + /////////////////////////// + asm("{\n\t" + ".reg .u32 b0,b1; \n\t" + "mad.lo.cc.u32 b0,%7,%13,%0; \n\t" + "madc.hi.cc.u32 b1,%7,%13,0; \n\t" + "mov.u32 %0,b0; \n\t" + "madc.lo.cc.u32 b1,%8,%13,b1; \n\t" + "madc.hi.cc.u32 b0,%8,%13,0; \n\t" + "add.cc.u32 b1,b1,%1; \n\t" + "mov.u32 %1,b1; \n\t" + "madc.lo.cc.u32 b0,%9,%13,b0; \n\t" + "madc.hi.cc.u32 b1,%9,%13,0; \n\t" + "add.cc.u32 b0,b0,%2; \n\t" + "mov.u32 %2,b0; \n\t" + "madc.lo.cc.u32 b1,%10,%13,b1; \n\t" + "madc.hi.cc.u32 b0,%10,%13,0; \n\t" + "add.cc.u32 b1,b1,%3; \n\t" + "mov.u32 %3,b1; \n\t" + "madc.lo.cc.u32 b0,%11,%13,b0; \n\t" + "madc.hi.cc.u32 b1,%11,%13,0; \n\t" + "add.cc.u32 b0,b0,%4; \n\t" + "mov.u32 %4,b0; \n\t" + "madc.lo.cc.u32 b1,%12,%13,b1; \n\t" + "madc.hi.cc.u32 %6,%12,%13,0; \n\t" + "add.cc.u32 b1,b1,%5; \n\t" + "addc.u32 %6,%6,0; \n\t" + "mov.u32 %5,b1; \n\t" + "}\n\t" + : "+r"(Wa.y), "+r"(Wa.z), "+r"(Wa.w), "+r"(Wb.x), "+r"(Wb.y), "+r"(Wb.z), "+r"(Wb.w) + : "r"(B0), "r"(B1), "r"(B2), "r"(B3), "r"(B4), "r"(B5), "r"(Q1)); + + w[i*threads + thread] = MAKE_ULONGLONG(Wa.x, Wa.y); + w[(i + 1)*threads + thread] = MAKE_ULONGLONG(Wa.z, Wa.w); + w[(i + 2)*threads + thread] = MAKE_ULONGLONG(Wb.x, Wb.y); + w[(i + 3)*threads + thread] = MAKE_ULONGLONG(Wb.z, Wb.w); + + + + } + +} + + +__global__ void __launch_bounds__(512, 3) m7_bigmul_unroll1_gpu(int threads, uint64_t* am, uint64_t* bm, uint64_t *w) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + + if (thread < threads) + { + + mul_unroll1_core_test(threads, thread, am, bm, w); + } // threads +} + +__global__ void __launch_bounds__(256, 2) m7_bigmul_unroll1_gpu_50(int threads, uint64_t* am, uint64_t* bm, uint64_t *w) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + + if (thread < threads) + { + mul_unroll1_core_test(threads, thread, am, bm, w); + } // threads +} + +__global__ void __launch_bounds__(256, 4) m7_bigmul_unroll1_gpu_80(int threads, uint64_t* am, uint64_t* bm, uint64_t *w) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + + if (thread < threads) + { + mul_unroll1_core_test(threads, thread, am, bm, w); + } // threads +} + + +__global__ void __launch_bounds__(512, 2) m7_bigmul_unroll2_gpu(int threads, uint64_t* am, uint64_t* bm, uint64_t *w) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + + if (thread < threads) + { + mul_unroll2_core_test(threads, thread, am, bm, w); + + } //// threads +} + +__global__ void __launch_bounds__(512, 2) m7_bigmul_unroll2_gpu_50(int threads, uint64_t* am, uint64_t* bm, uint64_t *w) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + + if (thread < threads) + { + mul_unroll2_core_test(threads, thread, am, bm, w); + } //// threads +} + + + + +__host__ void m7_bigmul_unroll1_cpu(int thr_id, int threads, uint64_t* Hash1, uint64_t* Hash2, uint64_t *finalHash, int order) +{ + + int threadsperblock = 512; + if (compute_version[thr_id] >= 50) { threadsperblock = 256; } + dim3 grid((threads + threadsperblock - 1) / threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + if (compute_version[thr_id]==50) { + m7_bigmul_unroll1_gpu_50 << > >(threads, Hash1, Hash2, finalHash); + } + else if (compute_version[thr_id]==52) { + m7_bigmul_unroll1_gpu_80 << > >(threads, Hash1, Hash2, finalHash); + } + else { + m7_bigmul_unroll1_gpu << > >(threads, Hash1, Hash2, finalHash); + } + +} + +__host__ void m7_bigmul_unroll2_cpu(int thr_id, int threads, uint64_t* Hash1, uint64_t* Hash2, uint64_t *finalHash, int order) +{ + + const int threadsperblock = 512; + + dim3 grid((threads + threadsperblock - 1) / threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + if (compute_version[thr_id] >= 50) { + m7_bigmul_unroll2_gpu << > >(threads, Hash1, Hash2, finalHash); + } + else { + m7_bigmul_unroll2_gpu << > >(threads, Hash1, Hash2, finalHash); + } + +} + + + + +__host__ void m7_bigmul_init(int thr_id, int threads) +{ + // why I am here ? +} \ No newline at end of file diff --git a/x13/cuda_mul2.cu b/x13/cuda_mul2.cu new file mode 100644 index 0000000000..a3e587cce2 --- /dev/null +++ b/x13/cuda_mul2.cu @@ -0,0 +1,459 @@ +/* + * sha256 djm34, catia + * + */ + +/* + * sha-256 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ + +#undef _GLIBCXX_ATOMIC_BUILTINS +#undef _GLIBCXX_USE_INT128 + +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + +#include "cuda_helper.h" + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + + + +typedef struct t4_t{ + uint64_t high,low; +} t4_t; + +__device__ __forceinline__ +ulonglong2 umul64wide (unsigned long long int a, + unsigned long long int b) +{ + ulonglong2 res; + asm ("{\n\t" + ".reg .u32 r0, r1, r2, r3, alo, ahi, blo, bhi;\n\t" + "mov.b64 {alo,ahi}, %2; \n\t" + "mov.b64 {blo,bhi}, %3; \n\t" + "mul.lo.u32 r0, alo, blo; \n\t" + "mul.hi.u32 r1, alo, blo; \n\t" + "mad.lo.cc.u32 r1, alo, bhi, r1;\n\t" + "madc.hi.u32 r2, alo, bhi, 0;\n\t" + "mad.lo.cc.u32 r1, ahi, blo, r1;\n\t" + "madc.hi.cc.u32 r2, ahi, blo, r2;\n\t" + "madc.hi.u32 r3, ahi, bhi, 0;\n\t" + "mad.lo.cc.u32 r2, ahi, bhi, r2;\n\t" + "addc.u32 r3, r3, 0; \n\t" + "mov.b64 %0, {r0,r1}; \n\t" + "mov.b64 %1, {r2,r3}; \n\t" + "}" + : "=l"(res.x), "=l"(res.y) + : "l"(a), "l"(b)); + return res; +} + +#define umul_ppmm(h,l,m,n) \ +{ \ + ulonglong2 foom = umul64wide(m,n); \ + h = foom.y; \ + l = foom.x; \ +} + + +__device__ __forceinline__ void umul_ppmmT4(t4_t *h, t4_t *l, t4_t m, t4_t n) +{ + asm ("{\n\t" + ".reg .u32 o0, o1, o2, o3, o4; \n\t" + ".reg .u32 o5, o6, o7, i8, i9; \n\t" + ".reg .u32 i10, i11, i12, i13; \n\t" + ".reg .u32 i14, i15, i16, i17; \n\t" + ".reg .u32 i18, i19, i20, i21; \n\t" + ".reg .u32 i22, i23; \n\t" + "mov.b64 { i8, i9}, %4; \n\t" + "mov.b64 {i10,i11}, %5; \n\t" + "mov.b64 {i12,i13}, %6; \n\t" + "mov.b64 {i14,i15}, %7; \n\t" + "mov.b64 {i16,i17}, %8; \n\t" + "mov.b64 {i18,i19}, %9; \n\t" + "mov.b64 {i20,i21},%10; \n\t" + "mov.b64 {i22,i23},%11; \n\t" + "mul.lo.u32 o0, i8, i16; \n\t" + "mul.hi.u32 o1, i8, i16; \n\t" + "mad.lo.cc.u32 o1, i8, i17, o1;\n\t" + "madc.hi.u32 o2, i8, i17, 0;\n\t" + "mad.lo.cc.u32 o1, i9, i16, o1;\n\t" + "madc.hi.cc.u32 o2, i9, i16, o2;\n\t" + "madc.hi.u32 o3, i8, i18, 0;\n\t" + "mad.lo.cc.u32 o2, i8, i18, o2;\n\t" + "madc.hi.cc.u32 o3, i9, i17, o3;\n\t" + "madc.hi.u32 o4, i8, i19, 0;\n\t" + "mad.lo.cc.u32 o2, i9, i17, o2;\n\t" + "madc.hi.cc.u32 o3, i10, i16, o3;\n\t" + "madc.hi.cc.u32 o4, i9, i18, o4;\n\t" + "addc.u32 o5, 0, 0;\n\t" + "mad.lo.cc.u32 o2, i10, i16, o2;\n\t" + "madc.lo.cc.u32 o3, i8, i19, o3;\n\t" + "madc.hi.cc.u32 o4, i10, i17, o4;\n\t" + "madc.hi.cc.u32 o5, i9, i19, o5;\n\t" + "addc.u32 o6, 0, 0;\n\t" + "mad.lo.cc.u32 o3, i9, i18, o3;\n\t" + "madc.hi.cc.u32 o4, i11, i16, o4;\n\t" + "madc.hi.cc.u32 o5, i10, i18, o5;\n\t" + "addc.u32 o6, 0, o6;\n\t" + "mad.lo.cc.u32 o3, i10, i17, o3;\n\t" + "addc.u32 o4, 0, o4;\n\t" + "mad.hi.cc.u32 o5, i11, i17, o5;\n\t" + "madc.hi.cc.u32 o6, i10, i19, o6;\n\t" + "addc.u32 o7, 0, 0;\n\t" + "mad.lo.cc.u32 o3, i11, i16, o3;\n\t" + "madc.lo.cc.u32 o4, i9, i19, o4;\n\t" + "addc.u32 o5, 0, o5;\n\t" + "mad.hi.cc.u32 o6, i11, i18, o6;\n\t" + "addc.u32 o7, 0, o7;\n\t" + "mad.lo.cc.u32 o4, i10, i18, o4;\n\t" + "addc.u32 o5, 0, o5;\n\t" + "mad.hi.u32 o7, i11, i19, o7;\n\t" + "mad.lo.cc.u32 o4, i11, i17, o4;\n\t" + "addc.u32 o5, 0, o5;\n\t" + "mad.lo.cc.u32 o5, i10, i19, o5;\n\t" + "addc.u32 o6, 0, o6;\n\t" + "mad.lo.cc.u32 o5, i11, i18, o5;\n\t" + "addc.u32 o6, 0, o6;\n\t" + "mad.lo.cc.u32 o6, i11, i19, o6;\n\t" + "addc.u32 o7, 0, o7;\n\t" + "mov.b64 %0, {o0,o1}; \n\t" + "mov.b64 %1, {o2,o3}; \n\t" + "mov.b64 %2, {o4,o5}; \n\t" + "mov.b64 %3, {o6,o7}; \n\t" + "}" + : "=l"(l->low), "=l"(l->high), "=l"(h->low), "=l"(h->high) + : "l"(m.low), "l"(m.high), "l"(0ULL), "l"(0ULL), + "l"(n.low), "l"(n.high), "l"(0ULL), "l"(0ULL)); +} + +#if 0 +__device__ __forceinline__ void umul_ppmmT4(t4_t *h, t4_t *l, t4_t m, t4_t n){ + uint64_t th,tl; + uint32_t c,c2; + umul_ppmm(l->high,l->low,m.low,n.low); + + umul_ppmm(th,tl,m.high,n.low); + l->high += tl; + c = (l->high < tl); + h->low = th + c; + c = (h->low < c); + h->high = c; + + //Second word + umul_ppmm(th,tl,m.low,n.high); + l->high += tl; + c = l->high < tl; + h->low += th; + c2 = h->low < th; + h->low += c; + c2 += h->low < c; + h->high += c2; + + umul_ppmm(th,tl,m.high,n.high); + h->low += tl; + c = h->low < tl; + h->high += th + c; +} +#endif + + +__device__ __forceinline__ t4_t T4(uint32_t thread, uint32_t threads, uint32_t idx, uint64_t *g){ + t4_t ret; + ret.high = g[(idx*2 + 1)*threads + thread]; + ret.low = g[(idx*2)*threads + thread]; + + + + return ret; +} + +__device__ __forceinline__ void T4_store(uint32_t thread, uint32_t threads, uint32_t idx, uint64_t *g, t4_t val){ + g[(idx*2 + 1)*threads + thread]=val.high; + g[(idx*2)*threads + thread]=val.low; + + + +} + +__device__ __forceinline__ void T4_set(t4_t *d, uint64_t v){ + d->high = 0; + d->low = v; +} + +__device__ __forceinline__ t4_t T4_add(t4_t a, t4_t b){ + t4_t ret; + uint32_t c=0; + ret.low = a.low + b.low; + if(ret.low < a.low) + c=1; + ret.high = a.high + b.high + c; + return ret; +} + +__device__ __forceinline__ t4_t T4_add(uint64_t a, t4_t b){ + t4_t ret; + uint32_t c=0; + ret.low = a + b.low; + if(ret.low < a) + c=1; + ret.high = b.high + c; + return ret; +} + + +__device__ __forceinline__ uint32_t T4_lt(t4_t a, t4_t b){ + if(a.high < b.high) + return 1; + if(a.high == b.high && a.low < b.low) + return 1; + return 0; +} + +__device__ __forceinline__ uint32_t T4_gt(t4_t a, uint64_t b){ + if(a.high) + return 1; + if(a.low > b) + return 1; + return 0; +} + + +__device__ void mulScalarT4(uint32_t thread, uint32_t threads, uint32_t len, uint64_t* g_p, uint64_t* g_v, t4_t sml, uint32_t *size){ + t4_t ul, cl, hpl, lpl; + uint32_t i; + T4_set(&cl,0); + for(i=0; i < len; i++) { + ul = T4(thread,threads,i,g_v); + umul_ppmmT4 (&hpl, &lpl, ul, sml); + + lpl = T4_add(lpl,cl); + cl = T4_add(T4_lt(lpl,cl),hpl); + + T4_store(thread,threads,i,g_p,lpl); + } + + T4_store(thread,threads,len,g_p,cl); + *size = len + T4_gt(cl,0); +} + + +__device__ void mulScalar(uint32_t thread, uint32_t threads, uint32_t len, uint64_t* g_p, uint64_t* g_v, uint64_t sml, uint32_t *size){ + uint64_t ul, cl, hpl, lpl; + uint32_t i; + cl = 0; + for(i=0; i < len; i++) { + ul = g_v[i*threads + thread]; + umul_ppmm (hpl, lpl, ul, sml); + + lpl += cl; + cl = (lpl < cl) + hpl; + + g_p[i*threads + thread] = lpl; + } + + g_p[len*threads + thread] = cl; + *size = len + (cl != 0); +} + +uint64_t __device__ addmul_1g (uint32_t thread, uint32_t threads, uint64_t *sum, uint32_t sofst, uint64_t *x, uint64_t xsz, uint64_t a){ + uint64_t carry=0; + uint32_t i; + uint64_t ul,lpl,hpl,rl; + + for(i=0; i < xsz; i++){ + + ul = x[i*threads + thread]; + umul_ppmm (hpl, lpl, ul, a); + + lpl += carry; + carry = (lpl < carry) + hpl; + + rl = sum[(i+sofst) * threads + thread]; + lpl = rl + lpl; + carry += lpl < rl; + sum[(i+sofst)*threads + thread] = lpl; + } + + return carry; +} + +t4_t __device__ addmul_1gT4 (uint32_t thread, uint32_t threads, uint64_t *sum, uint32_t sofst, uint64_t *x, uint64_t xsz, t4_t a){ + t4_t carry; + uint32_t i; + t4_t ul,lpl,hpl,rl; + T4_set(&carry,0); + for(i=0; i < xsz; i++){ + + ul = T4(thread,threads,i,x); + umul_ppmmT4 (&hpl, &lpl, ul, a); + + lpl = T4_add(lpl,carry); + carry = T4_add(T4_lt(lpl,carry), hpl); + + rl = T4(thread,threads,i+sofst,sum); + lpl = T4_add(rl,lpl); + carry = T4_add(T4_lt(lpl,rl),carry); + T4_store(thread,threads,i+sofst,sum,lpl); + } + + return carry; +} + + + +__global__ void gpu_mul(int threads, uint32_t ulegs, uint32_t vlegs, uint64_t *g_u, uint64_t *g_v, uint64_t *g_p) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + if(ulegs < vlegs){ + uint64_t t1=ulegs; + ulegs = vlegs; + vlegs = t1; + + uint64_t *t2 = g_u; + g_u = g_v; + g_v = t2; + } + + uint32_t vofst=1,rofst=1,psize=0; + mulScalar(thread,threads,ulegs,g_p,g_u,g_v[thread],&psize); + +#if 1 + + while (vofst < vlegs) { + + g_p[(psize+0)*threads+thread] = 0; + + g_p[(ulegs+rofst)*threads + thread] = addmul_1g (thread, threads, g_p ,rofst , g_u, ulegs, g_v[vofst*threads+thread]); + + vofst++; rofst++; + psize++; + } + + + + +#endif + } +} + +__global__ void gpu_mulT4(int threads, uint32_t ulegs, uint32_t vlegs, uint64_t *g_u, uint64_t *g_v, uint64_t *g_p) +{ + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + if(ulegs < vlegs){ + uint64_t t1=ulegs; + ulegs = vlegs; + vlegs = t1; + + uint64_t *t2 = g_u; + g_u = g_v; + g_v = t2; + } + + ulegs >>= 1; vlegs >>= 1; + + + + uint32_t vofst=1,rofst=1,psize=0; + mulScalarT4(thread,threads,ulegs,g_p,g_u,T4(thread,threads,0,g_v),&psize); + +#if 1 + t4_t zero; + T4_set(&zero,0); + + +#pragma unroll + for (vofst=1;vofst>>(threads, alegs, blegs, g_a, g_b, g_p) ; + +} + +__host__ void cpu_mulT4(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order) +{ + + const int threadsperblock = 256; + + dim3 grid(2*(threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size =0; + + gpu_mulT4<<>>(threads, blegs, alegs, g_b, g_a, g_p) ; +} + +__host__ void mul_init(){ + +} diff --git a/x13/cuda_ripemd160.cu b/x13/cuda_ripemd160.cu new file mode 100644 index 0000000000..eaa2b2390f --- /dev/null +++ b/x13/cuda_ripemd160.cu @@ -0,0 +1,400 @@ +/* + * ripemd-160 djm34 + * + */ + +/* + * ripemd-160 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + + +#include "cuda_helper.h" + +#define SPH_C64(x) ((uint64_t)(x ## ULL)) +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define ROTL SPH_ROTL32 + +// aus heavy.cu +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + + + __constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding) +static __constant__ uint32_t gpu_IV[5]; +static __constant__ uint32_t bufo[5]; +static const uint32_t IV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE), + SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0) +}; + +/* + * Round functions for RIPEMD-128 and RIPEMD-160. + */ +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + +/* + * Round constants for RIPEMD-160. + */ +#define K11 SPH_C32(0x00000000) +#define K12 SPH_C32(0x5A827999) +#define K13 SPH_C32(0x6ED9EBA1) +#define K14 SPH_C32(0x8F1BBCDC) +#define K15 SPH_C32(0xA953FD4E) + +#define K21 SPH_C32(0x50A28BE6) +#define K22 SPH_C32(0x5C4DD124) +#define K23 SPH_C32(0x6D703EF3) +#define K24 SPH_C32(0x7A6D76E9) +#define K25 SPH_C32(0x00000000) + +#define RR(a, b, c, d, e, f, s, r, k) { \ + a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \ + c = ROTL(c, 10); \ + } + +#define ROUND1(a, b, c, d, e, f, s, r, k) \ + RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) + +#define ROUND2(a, b, c, d, e, f, s, r, k) \ + RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) + + + +#define RIPEMD160_ROUND_BODY(in, h) { \ + uint32_t A1, B1, C1, D1, E1; \ + uint32_t A2, B2, C2, D2, E2; \ + uint32_t tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + E1 = E2 = (h)[4]; \ + \ + ROUND1(A, B, C, D, E, F1, 11, in[ 0], 1); \ + ROUND1(E, A, B, C, D, F1, 14, in[ 1], 1); \ + ROUND1(D, E, A, B, C, F1, 15, in[ 2], 1); \ + ROUND1(C, D, E, A, B, F1, 12, in[ 3], 1); \ + ROUND1(B, C, D, E, A, F1, 5, in[ 4], 1); \ + ROUND1(A, B, C, D, E, F1, 8, in[ 5], 1); \ + ROUND1(E, A, B, C, D, F1, 7, in[ 6], 1); \ + ROUND1(D, E, A, B, C, F1, 9, in[ 7], 1); \ + ROUND1(C, D, E, A, B, F1, 11, in[ 8], 1); \ + ROUND1(B, C, D, E, A, F1, 13, in[ 9], 1); \ + ROUND1(A, B, C, D, E, F1, 14, in[10], 1); \ + ROUND1(E, A, B, C, D, F1, 15, in[11], 1); \ + ROUND1(D, E, A, B, C, F1, 6, in[12], 1); \ + ROUND1(C, D, E, A, B, F1, 7, in[13], 1); \ + ROUND1(B, C, D, E, A, F1, 9, in[14], 1); \ + ROUND1(A, B, C, D, E, F1, 8, in[15], 1); \ + \ + ROUND1(E, A, B, C, D, F2, 7, in[ 7], 2); \ + ROUND1(D, E, A, B, C, F2, 6, in[ 4], 2); \ + ROUND1(C, D, E, A, B, F2, 8, in[13], 2); \ + ROUND1(B, C, D, E, A, F2, 13, in[ 1], 2); \ + ROUND1(A, B, C, D, E, F2, 11, in[10], 2); \ + ROUND1(E, A, B, C, D, F2, 9, in[ 6], 2); \ + ROUND1(D, E, A, B, C, F2, 7, in[15], 2); \ + ROUND1(C, D, E, A, B, F2, 15, in[ 3], 2); \ + ROUND1(B, C, D, E, A, F2, 7, in[12], 2); \ + ROUND1(A, B, C, D, E, F2, 12, in[ 0], 2); \ + ROUND1(E, A, B, C, D, F2, 15, in[ 9], 2); \ + ROUND1(D, E, A, B, C, F2, 9, in[ 5], 2); \ + ROUND1(C, D, E, A, B, F2, 11, in[ 2], 2); \ + ROUND1(B, C, D, E, A, F2, 7, in[14], 2); \ + ROUND1(A, B, C, D, E, F2, 13, in[11], 2); \ + ROUND1(E, A, B, C, D, F2, 12, in[ 8], 2); \ + \ + ROUND1(D, E, A, B, C, F3, 11, in[ 3], 3); \ + ROUND1(C, D, E, A, B, F3, 13, in[10], 3); \ + ROUND1(B, C, D, E, A, F3, 6, in[14], 3); \ + ROUND1(A, B, C, D, E, F3, 7, in[ 4], 3); \ + ROUND1(E, A, B, C, D, F3, 14, in[ 9], 3); \ + ROUND1(D, E, A, B, C, F3, 9, in[15], 3); \ + ROUND1(C, D, E, A, B, F3, 13, in[ 8], 3); \ + ROUND1(B, C, D, E, A, F3, 15, in[ 1], 3); \ + ROUND1(A, B, C, D, E, F3, 14, in[ 2], 3); \ + ROUND1(E, A, B, C, D, F3, 8, in[ 7], 3); \ + ROUND1(D, E, A, B, C, F3, 13, in[ 0], 3); \ + ROUND1(C, D, E, A, B, F3, 6, in[ 6], 3); \ + ROUND1(B, C, D, E, A, F3, 5, in[13], 3); \ + ROUND1(A, B, C, D, E, F3, 12, in[11], 3); \ + ROUND1(E, A, B, C, D, F3, 7, in[ 5], 3); \ + ROUND1(D, E, A, B, C, F3, 5, in[12], 3); \ + \ + ROUND1(C, D, E, A, B, F4, 11, in[ 1], 4); \ + ROUND1(B, C, D, E, A, F4, 12, in[ 9], 4); \ + ROUND1(A, B, C, D, E, F4, 14, in[11], 4); \ + ROUND1(E, A, B, C, D, F4, 15, in[10], 4); \ + ROUND1(D, E, A, B, C, F4, 14, in[ 0], 4); \ + ROUND1(C, D, E, A, B, F4, 15, in[ 8], 4); \ + ROUND1(B, C, D, E, A, F4, 9, in[12], 4); \ + ROUND1(A, B, C, D, E, F4, 8, in[ 4], 4); \ + ROUND1(E, A, B, C, D, F4, 9, in[13], 4); \ + ROUND1(D, E, A, B, C, F4, 14, in[ 3], 4); \ + ROUND1(C, D, E, A, B, F4, 5, in[ 7], 4); \ + ROUND1(B, C, D, E, A, F4, 6, in[15], 4); \ + ROUND1(A, B, C, D, E, F4, 8, in[14], 4); \ + ROUND1(E, A, B, C, D, F4, 6, in[ 5], 4); \ + ROUND1(D, E, A, B, C, F4, 5, in[ 6], 4); \ + ROUND1(C, D, E, A, B, F4, 12, in[ 2], 4); \ + \ + ROUND1(B, C, D, E, A, F5, 9, in[ 4], 5); \ + ROUND1(A, B, C, D, E, F5, 15, in[ 0], 5); \ + ROUND1(E, A, B, C, D, F5, 5, in[ 5], 5); \ + ROUND1(D, E, A, B, C, F5, 11, in[ 9], 5); \ + ROUND1(C, D, E, A, B, F5, 6, in[ 7], 5); \ + ROUND1(B, C, D, E, A, F5, 8, in[12], 5); \ + ROUND1(A, B, C, D, E, F5, 13, in[ 2], 5); \ + ROUND1(E, A, B, C, D, F5, 12, in[10], 5); \ + ROUND1(D, E, A, B, C, F5, 5, in[14], 5); \ + ROUND1(C, D, E, A, B, F5, 12, in[ 1], 5); \ + ROUND1(B, C, D, E, A, F5, 13, in[ 3], 5); \ + ROUND1(A, B, C, D, E, F5, 14, in[ 8], 5); \ + ROUND1(E, A, B, C, D, F5, 11, in[11], 5); \ + ROUND1(D, E, A, B, C, F5, 8, in[ 6], 5); \ + ROUND1(C, D, E, A, B, F5, 5, in[15], 5); \ + ROUND1(B, C, D, E, A, F5, 6, in[13], 5); \ + \ + ROUND2(A, B, C, D, E, F5, 8, in[ 5], 1); \ + ROUND2(E, A, B, C, D, F5, 9, in[14], 1); \ + ROUND2(D, E, A, B, C, F5, 9, in[ 7], 1); \ + ROUND2(C, D, E, A, B, F5, 11, in[ 0], 1); \ + ROUND2(B, C, D, E, A, F5, 13, in[ 9], 1); \ + ROUND2(A, B, C, D, E, F5, 15, in[ 2], 1); \ + ROUND2(E, A, B, C, D, F5, 15, in[11], 1); \ + ROUND2(D, E, A, B, C, F5, 5, in[ 4], 1); \ + ROUND2(C, D, E, A, B, F5, 7, in[13], 1); \ + ROUND2(B, C, D, E, A, F5, 7, in[ 6], 1); \ + ROUND2(A, B, C, D, E, F5, 8, in[15], 1); \ + ROUND2(E, A, B, C, D, F5, 11, in[ 8], 1); \ + ROUND2(D, E, A, B, C, F5, 14, in[ 1], 1); \ + ROUND2(C, D, E, A, B, F5, 14, in[10], 1); \ + ROUND2(B, C, D, E, A, F5, 12, in[ 3], 1); \ + ROUND2(A, B, C, D, E, F5, 6, in[12], 1); \ + \ + ROUND2(E, A, B, C, D, F4, 9, in[ 6], 2); \ + ROUND2(D, E, A, B, C, F4, 13, in[11], 2); \ + ROUND2(C, D, E, A, B, F4, 15, in[ 3], 2); \ + ROUND2(B, C, D, E, A, F4, 7, in[ 7], 2); \ + ROUND2(A, B, C, D, E, F4, 12, in[ 0], 2); \ + ROUND2(E, A, B, C, D, F4, 8, in[13], 2); \ + ROUND2(D, E, A, B, C, F4, 9, in[ 5], 2); \ + ROUND2(C, D, E, A, B, F4, 11, in[10], 2); \ + ROUND2(B, C, D, E, A, F4, 7, in[14], 2); \ + ROUND2(A, B, C, D, E, F4, 7, in[15], 2); \ + ROUND2(E, A, B, C, D, F4, 12, in[ 8], 2); \ + ROUND2(D, E, A, B, C, F4, 7, in[12], 2); \ + ROUND2(C, D, E, A, B, F4, 6, in[ 4], 2); \ + ROUND2(B, C, D, E, A, F4, 15, in[ 9], 2); \ + ROUND2(A, B, C, D, E, F4, 13, in[ 1], 2); \ + ROUND2(E, A, B, C, D, F4, 11, in[ 2], 2); \ + \ + ROUND2(D, E, A, B, C, F3, 9, in[15], 3); \ + ROUND2(C, D, E, A, B, F3, 7, in[ 5], 3); \ + ROUND2(B, C, D, E, A, F3, 15, in[ 1], 3); \ + ROUND2(A, B, C, D, E, F3, 11, in[ 3], 3); \ + ROUND2(E, A, B, C, D, F3, 8, in[ 7], 3); \ + ROUND2(D, E, A, B, C, F3, 6, in[14], 3); \ + ROUND2(C, D, E, A, B, F3, 6, in[ 6], 3); \ + ROUND2(B, C, D, E, A, F3, 14, in[ 9], 3); \ + ROUND2(A, B, C, D, E, F3, 12, in[11], 3); \ + ROUND2(E, A, B, C, D, F3, 13, in[ 8], 3); \ + ROUND2(D, E, A, B, C, F3, 5, in[12], 3); \ + ROUND2(C, D, E, A, B, F3, 14, in[ 2], 3); \ + ROUND2(B, C, D, E, A, F3, 13, in[10], 3); \ + ROUND2(A, B, C, D, E, F3, 13, in[ 0], 3); \ + ROUND2(E, A, B, C, D, F3, 7, in[ 4], 3); \ + ROUND2(D, E, A, B, C, F3, 5, in[13], 3); \ + \ + ROUND2(C, D, E, A, B, F2, 15, in[ 8], 4); \ + ROUND2(B, C, D, E, A, F2, 5, in[ 6], 4); \ + ROUND2(A, B, C, D, E, F2, 8, in[ 4], 4); \ + ROUND2(E, A, B, C, D, F2, 11, in[ 1], 4); \ + ROUND2(D, E, A, B, C, F2, 14, in[ 3], 4); \ + ROUND2(C, D, E, A, B, F2, 14, in[11], 4); \ + ROUND2(B, C, D, E, A, F2, 6, in[15], 4); \ + ROUND2(A, B, C, D, E, F2, 14, in[ 0], 4); \ + ROUND2(E, A, B, C, D, F2, 6, in[ 5], 4); \ + ROUND2(D, E, A, B, C, F2, 9, in[12], 4); \ + ROUND2(C, D, E, A, B, F2, 12, in[ 2], 4); \ + ROUND2(B, C, D, E, A, F2, 9, in[13], 4); \ + ROUND2(A, B, C, D, E, F2, 12, in[ 9], 4); \ + ROUND2(E, A, B, C, D, F2, 5, in[ 7], 4); \ + ROUND2(D, E, A, B, C, F2, 15, in[10], 4); \ + ROUND2(C, D, E, A, B, F2, 8, in[14], 4); \ + \ + ROUND2(B, C, D, E, A, F1, 8, in[12], 5); \ + ROUND2(A, B, C, D, E, F1, 5, in[15], 5); \ + ROUND2(E, A, B, C, D, F1, 12, in[10], 5); \ + ROUND2(D, E, A, B, C, F1, 9, in[ 4], 5); \ + ROUND2(C, D, E, A, B, F1, 12, in[ 1], 5); \ + ROUND2(B, C, D, E, A, F1, 5, in[ 5], 5); \ + ROUND2(A, B, C, D, E, F1, 14, in[ 8], 5); \ + ROUND2(E, A, B, C, D, F1, 6, in[ 7], 5); \ + ROUND2(D, E, A, B, C, F1, 8, in[ 6], 5); \ + ROUND2(C, D, E, A, B, F1, 13, in[ 2], 5); \ + ROUND2(B, C, D, E, A, F1, 6, in[13], 5); \ + ROUND2(A, B, C, D, E, F1, 5, in[14], 5); \ + ROUND2(E, A, B, C, D, F1, 15, in[ 0], 5); \ + ROUND2(D, E, A, B, C, F1, 13, in[ 3], 5); \ + ROUND2(C, D, E, A, B, F1, 11, in[ 9], 5); \ + ROUND2(B, C, D, E, A, F1, 11, in[11], 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + E2); \ + (h)[2] = SPH_T32((h)[3] + E1 + A2); \ + (h)[3] = SPH_T32((h)[4] + A1 + B2); \ + (h)[4] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } + + +__global__ void m7_ripemd160_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread ; +union { +uint8_t h1[64]; +uint32_t h4[16]; +uint64_t h8[8]; +} hash; + +#undef F1 +#undef F2 +#undef F3 +#undef F4 +#undef F5 + +#define F1(x, y, z) xor3(x,y,z) +#define F2(x, y, z) xandx(x,y,z) +#define F3(x, y, z) xornot64(x,y,z) +#define F4(x, y, z) xandx(z,x,y) +#define F5(x, y, z) xornt64(x,y,z) + uint32_t in2[16],in3[16]; + uint32_t in[16],buf[5]; + #pragma unroll 16 + for (int i=0;i<16;i++) {if ((i+16)<29) {in2[i]= c_PaddedMessage80[i+16];} + else if ((i+16)==29) {in2[i]= nounce;} + else if ((i+16)==30) {in2[i]= c_PaddedMessage80[i+16];} + else {in2[i]= 0;}} + #pragma unroll 16 + for (int i=0;i<16;i++) {in3[i]=0;} + in3[14]=0x3d0; + #pragma unroll 5 + for (int i=0;i<5;i++) {buf[i]=bufo[i];} + RIPEMD160_ROUND_BODY(in2, buf); + RIPEMD160_ROUND_BODY(in3, buf); + + +hash.h4[5]=0; +#pragma unroll 5 +for (int i=0;i<5;i++) +{hash.h4[i]=buf[i]; +} + +#pragma unroll 3 +for (int i=0;i<3;i++) {outputHash[i*threads+thread]=hash.h8[i];} + + } +} + + +void ripemd160_cpu_init(int thr_id, int threads) +{ + + cudaMemcpyToSymbol(gpu_IV,IV,sizeof(IV),0, cudaMemcpyHostToDevice); + +} + +__host__ void ripemd160_setBlock_120(void *pdata) +{ + unsigned char PaddedMessage[128]; + uint8_t ending =0x80; + memcpy(PaddedMessage, pdata, 122); + memset(PaddedMessage+122,ending,1); + memset(PaddedMessage+123, 0, 5); //useless + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 32*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); + +#undef F1 +#undef F2 +#undef F3 +#undef F4 +#undef F5 +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + uint32_t* alt_data =(uint32_t*)pdata; + uint32_t in[16],buf[5]; + + + for (int i=0;i<16;i++) {in[i]= alt_data[i];} + + + for (int i=0;i<5;i++) {buf[i]=IV[i];} + + RIPEMD160_ROUND_BODY(in, buf); //no need to calculate it several time (need to moved) + cudaMemcpyToSymbol(bufo, buf, 5*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); +} + +__host__ void m7_ripemd160_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN + + +dim3 grid(threads/threadsperblock); +dim3 block(threadsperblock); +//dim3 grid(1); +//dim3 block(1); + size_t shared_size =0; + m7_ripemd160_gpu_hash_120<<>>(threads, startNounce, d_outputHash); + + MyStreamSynchronize(NULL, order, thr_id); +} diff --git a/x13/cuda_sha512.cu b/x13/cuda_sha512.cu new file mode 100644 index 0000000000..be9276e96c --- /dev/null +++ b/x13/cuda_sha512.cu @@ -0,0 +1,419 @@ +/* + * sha512 djm34 + * + */ + +/* + * sha-512 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + +#define USE_SHARED 1 +#include "cuda_helper.h" +#define SPH_C64(x) ((uint64_t)(x ## ULL)) + + +// aus heavy.cu +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +extern int device_major[8]; + + +__constant__ uint64_t c_PaddedMessage80[16]; +static __constant__ uint64_t H_512[8]; +static __constant__ uint64_t gpu_WK[80]; +static __constant__ uint64_t gpu_W[80]; + +static const uint64_t H512[8] = { + SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B), + SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1), + SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F), + SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179) +}; +static __constant__ uint64_t K_512[80]; + +static const uint64_t K512[80] = { + SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD), + SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC), + SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019), + SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118), + SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE), + SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2), + SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1), + SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694), + SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3), + SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65), + SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483), + SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5), + SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210), + SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4), + SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725), + SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70), + SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926), + SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF), + SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8), + SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B), + SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001), + SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30), + SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910), + SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8), + SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53), + SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8), + SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB), + SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3), + SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60), + SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC), + SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9), + SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B), + SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207), + SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178), + SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6), + SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B), + SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493), + SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C), + SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A), + SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817) +}; + + +static __device__ __forceinline__ uint64_t bsg5_0(uint64_t x) +{ + uint64_t r1 = ROTR64(x,28); + uint64_t r2 = ROTR64(x,34); + uint64_t r3 = ROTR64(x,39); + return xor3(r1,r2,r3); +} +static __device__ __forceinline__ uint64_t bsg5_1(uint64_t x) +{ + uint64_t r1 = ROTR64(x,14); + uint64_t r2 = ROTR64(x,18); + uint64_t r3 = ROTR64(x,41); + return xor3(r1,r2,r3); +} +static __device__ __forceinline__ uint64_t ssg5_0(uint64_t x) +{ + uint64_t r1 = ROTR64(x,1); + uint64_t r2 = ROTR64(x,8); + uint64_t r3 = shr_t64(x,7); + return xor3(r1,r2,r3); +} +static __device__ __forceinline__ uint64_t ssg5_1(uint64_t x) +{ + uint64_t r1 = ROTR64(x,19); + uint64_t r2 = ROTR64(x,61); + uint64_t r3 = shr_t64(x,6); + return xor3(r1,r2,r3); +} + + +static __device__ __forceinline__ void sha3_step2(uint64_t* r,uint64_t* W,uint64_t* K,int ord,int i) +{ +int u = 8-ord; +uint64_t a=r[(0+u)& 7]; +uint64_t b=r[(1+u)& 7]; +uint64_t c=r[(2+u)& 7]; +uint64_t d=r[(3+u)& 7]; +uint64_t e=r[(4+u)& 7]; +uint64_t f=r[(5+u)& 7]; +uint64_t g=r[(6+u)& 7]; +uint64_t h=r[(7+u)& 7]; + +uint64_t T1, T2; +T1 = h+bsg5_1(e)+xandx64(e,f,g)+W[i]+K[i]; +T2 = bsg5_0(a) + andor(a,b,c); +r[(3+u)& 7] = d + T1; +r[(7+u)& 7] = T1 + T2; + +} + +static __device__ __forceinline__ void sha3_step3(uint64_t* r,uint64_t* W,int ord,int i) +{ +int u = 8-ord; +uint64_t a=r[(0+u)& 7]; +uint64_t b=r[(1+u)& 7]; +uint64_t c=r[(2+u)& 7]; +uint64_t d=r[(3+u)& 7]; +uint64_t e=r[(4+u)& 7]; +uint64_t f=r[(5+u)& 7]; +uint64_t g=r[(6+u)& 7]; +uint64_t h=r[(7+u)& 7]; + +uint64_t T1, T2; +T1 = h+bsg5_1(e)+xandx64(e,f,g)+W[i]; +T2 = bsg5_0(a) + andor(a,b,c); +r[(3+u)& 7] = d + T1; +r[(7+u)& 7] = T1 + T2; + +} + + +__global__ void sha512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +{ + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); + + int hashPosition = nounce - startNounce; + + + uint64_t *inpHash = (uint64_t*)&g_hash + 8*thread; + + + + uint64_t W[80]; + uint64_t r[8]; +#pragma unroll 71 + for (int i=9;i<80;i++) {W[i]=0;} + +#pragma unroll 8 + for (int i = 0; i < 8; i ++) { + W[i] = cuda_swab64(inpHash[i]); + r[i] = H_512[i];} + + W[8] = 0x8000000000000000; + W[15]= 0x0000000000000200; +#pragma unroll 64 + for (int i = 16; i < 80; i ++) + W[i] = sph_t64(ssg5_1(W[i - 2]) + W[i - 7] + ssg5_0(W[i - 15]) + W[i - 16]); + +#if __CUDA_ARCH__ < 500 // go figure... +#pragma unroll 10 +#endif + for (int i = 0; i < 10; i ++) { +#pragma unroll 8 + for (int ord=0;ord<8;ord++) {sha3_step2(r,W,K_512,ord,8*i+ord);} + } + +#pragma unroll 8 + for (int i = 0; i < 8; i++) {r[i] = sph_t64(r[i] + H_512[i]);} + + #pragma unroll 8 + for (int u = 0; u < 8; u ++) + inpHash[u] = cuda_swab64(r[u]); + } +} + + +__global__ void __launch_bounds__(256,3) m7_sha512_gpu50_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + __shared__ uint64_t K[80]; + __shared__ uint64_t WK[80]; + if (threadIdx.x<80) + { + WK[threadIdx.x] = gpu_WK[threadIdx.x]; + K[threadIdx.x] =K_512[threadIdx.x]; + } + __syncthreads(); + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + + uint32_t nounce = startNounce + thread; + + uint64_t W[80]; + uint64_t r[8]; +#pragma unroll 8 + for (int i = 0; i < 8; i ++) {r[i] = H_512[i];} +#pragma unroll 14 + for (int i = 0; i < 14; i ++) {W[i] = cuda_swab64(c_PaddedMessage80[i]);} + W[14] = cuda_swab64(REPLACE_HIWORD(c_PaddedMessage80[14],nounce)); + W[15] = cuda_swab64(c_PaddedMessage80[15]); + +#pragma unroll 64 + for (int i = 16; i < 80; i ++) + W[i] = sph_t64(ssg5_1(W[i - 2]) + W[i - 7] + ssg5_0(W[i - 15]) + W[i - 16]); + +#if __CUDA_ARCH__ < 500 // go figure... +#pragma unroll 10 +#endif + for (int i = 0; i < 10; i ++) { +#pragma unroll 8 + for (int ord=0;ord<8;ord++) {sha3_step2(r,W,K,ord,8*i+ord); } + } + uint64_t tempr[8]; +#pragma unroll 8 + for (int i = 0; i < 8; i++) {tempr[i] = r[i] = sph_t64(r[i] + H_512[i]);} + + +#if __CUDA_ARCH__ < 500 // go figure... +#pragma unroll +#endif 10 + for (int i = 0; i < 10; i ++) { +#pragma unroll 8 + for (int ord=0;ord<8;ord++) {sha3_step3(r,WK,ord,8*i+ord); } + } + + +#pragma unroll 8 +for(int i=0;i<8;i++) {outputHash[i*threads+thread] = cuda_swab64(sph_t64(r[i] + tempr[i]));} + + + } /// thread +} + +__global__ void __launch_bounds__(256,4) m7_sha512_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + __shared__ uint64_t K[80]; + __shared__ uint64_t WK[80]; + if (threadIdx.x<80) + { + WK[threadIdx.x] = gpu_WK[threadIdx.x]; + K[threadIdx.x] =K_512[threadIdx.x]; + } + __syncthreads(); + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + + uint32_t nounce = startNounce + thread; + + uint64_t W[80]; + uint64_t r[8]; +#pragma unroll 8 + for (int i = 0; i < 8; i ++) {r[i] = H_512[i];} +#pragma unroll 14 + for (int i = 0; i < 14; i ++) {W[i] = cuda_swab64(c_PaddedMessage80[i]);} + W[14] = cuda_swab64(REPLACE_HIWORD(c_PaddedMessage80[14],nounce)); + W[15] = cuda_swab64(c_PaddedMessage80[15]); + +#pragma unroll 64 + for (int i = 16; i < 80; i ++) + W[i] = sph_t64(ssg5_1(W[i - 2]) + W[i - 7] + ssg5_0(W[i - 15]) + W[i - 16]); + +#if __CUDA_ARCH__ < 500 // go figure... +#pragma unroll 10 +#endif + for (int i = 0; i < 10; i ++) { +#pragma unroll 8 + for (int ord=0;ord<8;ord++) {sha3_step2(r,W,K,ord,8*i+ord); } + } + uint64_t tempr[8]; +#pragma unroll 8 + for (int i = 0; i < 8; i++) {tempr[i] = r[i] = sph_t64(r[i] + H_512[i]);} + + +#if __CUDA_ARCH__ < 500 // go figure... +#pragma unroll +#endif 10 + for (int i = 0; i < 10; i ++) { +#pragma unroll 8 + for (int ord=0;ord<8;ord++) {sha3_step3(r,WK,ord,8*i+ord); } + } + + +#pragma unroll 8 +for(int i=0;i<8;i++) {outputHash[i*threads+thread] = cuda_swab64(sph_t64(r[i] + tempr[i]));} + + + } /// thread +} + + +void sha512_cpu_init(int thr_id, int threads) +{ +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define SPH_T64(x) ((x) & 0xFFFFFFFFFFFFFFFF) +#define BSG5_0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39)) +#define BSG5_1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41)) +#define SSG5_0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SPH_T64((x) >> 7)) +#define SSG5_1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SPH_T64((x) >> 6)) + cudaMemcpyToSymbol(K_512,K512,80*sizeof(uint64_t),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(H_512,H512,sizeof(H512),0, cudaMemcpyHostToDevice); + uint64_t W[80],WK[80]; + + for (int i = 0; i < 15; i ++) {W[i] = 0;} + W[15]=0x3d0; + for (int i = 16; i < 80; i ++) { + W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] + SSG5_0(W[i - 15]) + W[i - 16]);} + for (int i=0; i<80;i++) {WK[i]=W[i]+K512[i];} + cudaMemcpyToSymbol(gpu_WK,WK,80*sizeof(uint64_t),0, cudaMemcpyHostToDevice); +} + + +__host__ void sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +{ + + const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size =0; + sha512_gpu_hash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); + + MyStreamSynchronize(NULL, order, thr_id); +} + + +__host__ void sha512_setBlock_120(void *pdata) +{ + unsigned char PaddedMessage[128]; + uint8_t ending =0x80; + memcpy(PaddedMessage, pdata, 122); + memset(PaddedMessage+122,ending,1); + memset(PaddedMessage+123, 0, 5); //useless + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + +} + +__host__ void m7_sha512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 256; // Alignment mit mixtob Grösse. NICHT ÄNDERN + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid(threads/threadsperblock); + dim3 block(threadsperblock); + size_t shared_size = 0; + if (device_major[thr_id]==5) { + m7_sha512_gpu50_hash_120<<>>(threads, startNounce, d_outputHash); + } else { + m7_sha512_gpu_hash_120<<>>(threads, startNounce, d_outputHash); + } + MyStreamSynchronize(NULL, order, thr_id); +} + diff --git a/x13/cuda_shabal512.cu b/x13/cuda_shabal512.cu new file mode 100644 index 0000000000..87527f5b37 --- /dev/null +++ b/x13/cuda_shabal512.cu @@ -0,0 +1,415 @@ +/* + * Quick and dirty addition of Shabal-512 for X15 + * + * Built on cbuchner1's implementation, actual hashing code + * heavily based on phm's sgminer + * + */ + +/* + * Shabal-512 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include +#include +#include + + +#define USE_SHARED 1 + +#define SPH_C64(x) ((uint64_t)(x ## ULL)) +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + +#define sM 16 + +#define C32 SPH_C32 +#define T32 SPH_T32 + +#define O1 13 +#define O2 9 +#define O3 6 + + +#if __CUDA_ARCH__ < 350 + // Kepler (Compute 3.0) + #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#else + // Kepler (Compute 3.5) + #define SPH_ROTL32(x, n) __funnelshift_l( (x), (x), (n) ) +#endif + + +// aus heavy.cu +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +__constant__ uint32_t C_512[16]; +static const uint32_t C_init_512[] = { + C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359), + C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780), + C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A), + C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969) +}; +__constant__ uint32_t A_512[16]; +static const uint32_t A_init_512[] = { + C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632), + C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B), + C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F) +}; +__constant__ uint32_t B_512[16]; +static const uint32_t B_init_512[] = { + C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640), + C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08), + C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E), + C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B) +}; + +#define INPUT_BLOCK_ADD { \ + B0 = T32(B0 + M0); \ + B1 = T32(B1 + M1); \ + B2 = T32(B2 + M2); \ + B3 = T32(B3 + M3); \ + B4 = T32(B4 + M4); \ + B5 = T32(B5 + M5); \ + B6 = T32(B6 + M6); \ + B7 = T32(B7 + M7); \ + B8 = T32(B8 + M8); \ + B9 = T32(B9 + M9); \ + BA = T32(BA + MA); \ + BB = T32(BB + MB); \ + BC = T32(BC + MC); \ + BD = T32(BD + MD); \ + BE = T32(BE + ME); \ + BF = T32(BF + MF); \ + } + +#define INPUT_BLOCK_SUB { \ + C0 = T32(C0 - M0); \ + C1 = T32(C1 - M1); \ + C2 = T32(C2 - M2); \ + C3 = T32(C3 - M3); \ + C4 = T32(C4 - M4); \ + C5 = T32(C5 - M5); \ + C6 = T32(C6 - M6); \ + C7 = T32(C7 - M7); \ + C8 = T32(C8 - M8); \ + C9 = T32(C9 - M9); \ + CA = T32(CA - MA); \ + CB = T32(CB - MB); \ + CC = T32(CC - MC); \ + CD = T32(CD - MD); \ + CE = T32(CE - ME); \ + CF = T32(CF - MF); \ + } + +#define XOR_W { \ + A00 ^= Wlow; \ + A01 ^= Whigh; \ + } + +#define SWAP(v1, v2) { \ + uint32_t tmp = (v1); \ + (v1) = (v2); \ + (v2) = tmp; \ + } + +#define SWAP_BC { \ + SWAP(B0, C0); \ + SWAP(B1, C1); \ + SWAP(B2, C2); \ + SWAP(B3, C3); \ + SWAP(B4, C4); \ + SWAP(B5, C5); \ + SWAP(B6, C6); \ + SWAP(B7, C7); \ + SWAP(B8, C8); \ + SWAP(B9, C9); \ + SWAP(BA, CA); \ + SWAP(BB, CB); \ + SWAP(BC, CC); \ + SWAP(BD, CD); \ + SWAP(BE, CE); \ + SWAP(BF, CF); \ + } + +#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) { \ + xa0 = T32((xa0 \ + ^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \ + ^ xc) * 3U) \ + ^ xb1 ^ (xb2 & ~xb3) ^ xm; \ + xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \ + } + +#define PERM_STEP_0 { \ + PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \ + } + +#define PERM_STEP_1 { \ + PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \ + } + +#define PERM_STEP_2 { \ + PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \ + PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \ + PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \ + PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \ + PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \ + PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \ + PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \ + PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \ + PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \ + PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \ + PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \ + PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \ + PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \ + PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \ + PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \ + PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \ + } + +#define APPLY_P { \ + B0 = T32(B0 << 17) | (B0 >> 15); \ + B1 = T32(B1 << 17) | (B1 >> 15); \ + B2 = T32(B2 << 17) | (B2 >> 15); \ + B3 = T32(B3 << 17) | (B3 >> 15); \ + B4 = T32(B4 << 17) | (B4 >> 15); \ + B5 = T32(B5 << 17) | (B5 >> 15); \ + B6 = T32(B6 << 17) | (B6 >> 15); \ + B7 = T32(B7 << 17) | (B7 >> 15); \ + B8 = T32(B8 << 17) | (B8 >> 15); \ + B9 = T32(B9 << 17) | (B9 >> 15); \ + BA = T32(BA << 17) | (BA >> 15); \ + BB = T32(BB << 17) | (BB >> 15); \ + BC = T32(BC << 17) | (BC >> 15); \ + BD = T32(BD << 17) | (BD >> 15); \ + BE = T32(BE << 17) | (BE >> 15); \ + BF = T32(BF << 17) | (BF >> 15); \ + PERM_STEP_0; \ + PERM_STEP_1; \ + PERM_STEP_2; \ + A0B = T32(A0B + C6); \ + A0A = T32(A0A + C5); \ + A09 = T32(A09 + C4); \ + A08 = T32(A08 + C3); \ + A07 = T32(A07 + C2); \ + A06 = T32(A06 + C1); \ + A05 = T32(A05 + C0); \ + A04 = T32(A04 + CF); \ + A03 = T32(A03 + CE); \ + A02 = T32(A02 + CD); \ + A01 = T32(A01 + CC); \ + A00 = T32(A00 + CB); \ + A0B = T32(A0B + CA); \ + A0A = T32(A0A + C9); \ + A09 = T32(A09 + C8); \ + A08 = T32(A08 + C7); \ + A07 = T32(A07 + C6); \ + A06 = T32(A06 + C5); \ + A05 = T32(A05 + C4); \ + A04 = T32(A04 + C3); \ + A03 = T32(A03 + C2); \ + A02 = T32(A02 + C1); \ + A01 = T32(A01 + C0); \ + A00 = T32(A00 + CF); \ + A0B = T32(A0B + CE); \ + A0A = T32(A0A + CD); \ + A09 = T32(A09 + CC); \ + A08 = T32(A08 + CB); \ + A07 = T32(A07 + CA); \ + A06 = T32(A06 + C9); \ + A05 = T32(A05 + C8); \ + A04 = T32(A04 + C7); \ + A03 = T32(A03 + C6); \ + A02 = T32(A02 + C5); \ + A01 = T32(A01 + C4); \ + A00 = T32(A00 + C3); \ + } + +#define INCR_W { \ + if ((Wlow = T32(Wlow + 1)) == 0) \ + Whigh = T32(Whigh + 1); \ + } + + +__global__ void x13_shabal512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); + + int hashPosition = nounce - startNounce; + + + uint32_t *inpHash = (uint32_t*)&g_hash[8 * hashPosition]; + + +union { +uint8_t h1[64]; +uint32_t h4[16]; +uint64_t h8[8]; +} hash; + + + + #pragma unroll 16 + for (int i=0;i<16;i++) { + hash.h4[i]= inpHash[i];} + +///////// input big ///////////////////// + uint32_t A00 = A_512[0], A01 = A_512[1], A02 = A_512[2], A03 = A_512[3], A04 = A_512[4], A05 = A_512[5], A06 = A_512[6], A07 = A_512[7], + A08 = A_512[8], A09 = A_512[9], A0A = A_512[10], A0B = A_512[11]; + uint32_t B0 = B_512[0], B1 = B_512[1], B2 = B_512[2], B3 = B_512[3], B4 = B_512[4], B5 = B_512[5], B6 = B_512[6], B7 = B_512[7], + B8 = B_512[8], B9 = B_512[9], BA = B_512[10], BB = B_512[11], BC = B_512[12], BD = B_512[13], BE = B_512[14], BF = B_512[15]; + uint32_t C0 = C_512[0], C1 = C_512[1], C2 = C_512[2], C3 = C_512[3], C4 = C_512[4], C5 = C_512[5], C6 = C_512[6], C7 = C_512[7], + C8 = C_512[8], C9 = C_512[9], CA = C_512[10], CB = C_512[11], CC = C_512[12], CD = C_512[13], CE = C_512[14], CF = C_512[15]; + uint32_t M0, M1, M2, M3, M4, M5, M6, M7, M8, M9, MA, MB, MC, MD, ME, MF; + uint32_t Wlow = 1, Whigh = 0; + + M0 = hash.h4[0]; + M1 = hash.h4[1]; + M2 = hash.h4[2]; + M3 = hash.h4[3]; + M4 = hash.h4[4]; + M5 = hash.h4[5]; + M6 = hash.h4[6]; + M7 = hash.h4[7]; + M8 = hash.h4[8]; + M9 = hash.h4[9]; + MA = hash.h4[10]; + MB = hash.h4[11]; + MC = hash.h4[12]; + MD = hash.h4[13]; + ME = hash.h4[14]; + MF = hash.h4[15]; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + INPUT_BLOCK_SUB; + SWAP_BC; + INCR_W; + + M0 = 0x80; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = M8 = M9 = MA = MB = MC = MD = ME = MF = 0; + + INPUT_BLOCK_ADD; + XOR_W; + APPLY_P; + + #pragma unroll 3 + for (unsigned i = 0; i < 3; i ++) { + SWAP_BC; + XOR_W; + APPLY_P; + } + + hash.h4[0] = B0; + hash.h4[1] = B1; + hash.h4[2] = B2; + hash.h4[3] = B3; + hash.h4[4] = B4; + hash.h4[5] = B5; + hash.h4[6] = B6; + hash.h4[7] = B7; + hash.h4[8] = B8; + hash.h4[9] = B9; + hash.h4[10] = BA; + hash.h4[11] = BB; + hash.h4[12] = BC; + hash.h4[13] = BD; + hash.h4[14] = BE; + hash.h4[15] = BF; + + #pragma unroll 16 + for (int u = 0; u < 16; u ++) + inpHash[u] = hash.h4[u]; + } +} + + +void x13_shabal512_cpu_init(int thr_id, int threads) +{ + + cudaMemcpyToSymbol(A_512,A_init_512,sizeof(A_init_512),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(B_512,B_init_512,sizeof(B_init_512),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(C_512,C_init_512,sizeof(C_init_512),0, cudaMemcpyHostToDevice); +} + + +__host__ void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +{ + + const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + x13_shabal512_gpu_hash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); + + MyStreamSynchronize(NULL, order, thr_id); +} diff --git a/x13/cuda_tiger192.cu b/x13/cuda_tiger192.cu new file mode 100644 index 0000000000..e1db9fac3a --- /dev/null +++ b/x13/cuda_tiger192.cu @@ -0,0 +1,805 @@ +/* + * tiger-192 djm34 + * + */ + +/* + * tiger-192 kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + + +#include "cuda_helper.h" + +#define SPH_C64(x) ((uint64_t)(x ## ULL)) +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define ROTL SPH_ROTL32 +//#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_T64(x) (x) +// aus heavy.cu +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + + + __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) + __constant__ uint64_t bufo[3]; +static __constant__ uint64_t gpu_III[3]; +static __constant__ uint64_t T1[256]; +static __constant__ uint64_t T2[256]; +static __constant__ uint64_t T3[256]; +static __constant__ uint64_t T4[256]; +static const uint64_t III[3] = { + SPH_C64(0x0123456789ABCDEF),SPH_C64(0xFEDCBA9876543210),SPH_C64(0xF096A5B4C3B2E187) +}; + +static const uint64_t cpu_T1[256] = { + SPH_C64(0x02AAB17CF7E90C5E), SPH_C64(0xAC424B03E243A8EC), + SPH_C64(0x72CD5BE30DD5FCD3), SPH_C64(0x6D019B93F6F97F3A), + SPH_C64(0xCD9978FFD21F9193), SPH_C64(0x7573A1C9708029E2), + SPH_C64(0xB164326B922A83C3), SPH_C64(0x46883EEE04915870), + SPH_C64(0xEAACE3057103ECE6), SPH_C64(0xC54169B808A3535C), + SPH_C64(0x4CE754918DDEC47C), SPH_C64(0x0AA2F4DFDC0DF40C), + SPH_C64(0x10B76F18A74DBEFA), SPH_C64(0xC6CCB6235AD1AB6A), + SPH_C64(0x13726121572FE2FF), SPH_C64(0x1A488C6F199D921E), + SPH_C64(0x4BC9F9F4DA0007CA), SPH_C64(0x26F5E6F6E85241C7), + SPH_C64(0x859079DBEA5947B6), SPH_C64(0x4F1885C5C99E8C92), + SPH_C64(0xD78E761EA96F864B), SPH_C64(0x8E36428C52B5C17D), + SPH_C64(0x69CF6827373063C1), SPH_C64(0xB607C93D9BB4C56E), + SPH_C64(0x7D820E760E76B5EA), SPH_C64(0x645C9CC6F07FDC42), + SPH_C64(0xBF38A078243342E0), SPH_C64(0x5F6B343C9D2E7D04), + SPH_C64(0xF2C28AEB600B0EC6), SPH_C64(0x6C0ED85F7254BCAC), + SPH_C64(0x71592281A4DB4FE5), SPH_C64(0x1967FA69CE0FED9F), + SPH_C64(0xFD5293F8B96545DB), SPH_C64(0xC879E9D7F2A7600B), + SPH_C64(0x860248920193194E), SPH_C64(0xA4F9533B2D9CC0B3), + SPH_C64(0x9053836C15957613), SPH_C64(0xDB6DCF8AFC357BF1), + SPH_C64(0x18BEEA7A7A370F57), SPH_C64(0x037117CA50B99066), + SPH_C64(0x6AB30A9774424A35), SPH_C64(0xF4E92F02E325249B), + SPH_C64(0x7739DB07061CCAE1), SPH_C64(0xD8F3B49CECA42A05), + SPH_C64(0xBD56BE3F51382F73), SPH_C64(0x45FAED5843B0BB28), + SPH_C64(0x1C813D5C11BF1F83), SPH_C64(0x8AF0E4B6D75FA169), + SPH_C64(0x33EE18A487AD9999), SPH_C64(0x3C26E8EAB1C94410), + SPH_C64(0xB510102BC0A822F9), SPH_C64(0x141EEF310CE6123B), + SPH_C64(0xFC65B90059DDB154), SPH_C64(0xE0158640C5E0E607), + SPH_C64(0x884E079826C3A3CF), SPH_C64(0x930D0D9523C535FD), + SPH_C64(0x35638D754E9A2B00), SPH_C64(0x4085FCCF40469DD5), + SPH_C64(0xC4B17AD28BE23A4C), SPH_C64(0xCAB2F0FC6A3E6A2E), + SPH_C64(0x2860971A6B943FCD), SPH_C64(0x3DDE6EE212E30446), + SPH_C64(0x6222F32AE01765AE), SPH_C64(0x5D550BB5478308FE), + SPH_C64(0xA9EFA98DA0EDA22A), SPH_C64(0xC351A71686C40DA7), + SPH_C64(0x1105586D9C867C84), SPH_C64(0xDCFFEE85FDA22853), + SPH_C64(0xCCFBD0262C5EEF76), SPH_C64(0xBAF294CB8990D201), + SPH_C64(0xE69464F52AFAD975), SPH_C64(0x94B013AFDF133E14), + SPH_C64(0x06A7D1A32823C958), SPH_C64(0x6F95FE5130F61119), + SPH_C64(0xD92AB34E462C06C0), SPH_C64(0xED7BDE33887C71D2), + SPH_C64(0x79746D6E6518393E), SPH_C64(0x5BA419385D713329), + SPH_C64(0x7C1BA6B948A97564), SPH_C64(0x31987C197BFDAC67), + SPH_C64(0xDE6C23C44B053D02), SPH_C64(0x581C49FED002D64D), + SPH_C64(0xDD474D6338261571), SPH_C64(0xAA4546C3E473D062), + SPH_C64(0x928FCE349455F860), SPH_C64(0x48161BBACAAB94D9), + SPH_C64(0x63912430770E6F68), SPH_C64(0x6EC8A5E602C6641C), + SPH_C64(0x87282515337DDD2B), SPH_C64(0x2CDA6B42034B701B), + SPH_C64(0xB03D37C181CB096D), SPH_C64(0xE108438266C71C6F), + SPH_C64(0x2B3180C7EB51B255), SPH_C64(0xDF92B82F96C08BBC), + SPH_C64(0x5C68C8C0A632F3BA), SPH_C64(0x5504CC861C3D0556), + SPH_C64(0xABBFA4E55FB26B8F), SPH_C64(0x41848B0AB3BACEB4), + SPH_C64(0xB334A273AA445D32), SPH_C64(0xBCA696F0A85AD881), + SPH_C64(0x24F6EC65B528D56C), SPH_C64(0x0CE1512E90F4524A), + SPH_C64(0x4E9DD79D5506D35A), SPH_C64(0x258905FAC6CE9779), + SPH_C64(0x2019295B3E109B33), SPH_C64(0xF8A9478B73A054CC), + SPH_C64(0x2924F2F934417EB0), SPH_C64(0x3993357D536D1BC4), + SPH_C64(0x38A81AC21DB6FF8B), SPH_C64(0x47C4FBF17D6016BF), + SPH_C64(0x1E0FAADD7667E3F5), SPH_C64(0x7ABCFF62938BEB96), + SPH_C64(0xA78DAD948FC179C9), SPH_C64(0x8F1F98B72911E50D), + SPH_C64(0x61E48EAE27121A91), SPH_C64(0x4D62F7AD31859808), + SPH_C64(0xECEBA345EF5CEAEB), SPH_C64(0xF5CEB25EBC9684CE), + SPH_C64(0xF633E20CB7F76221), SPH_C64(0xA32CDF06AB8293E4), + SPH_C64(0x985A202CA5EE2CA4), SPH_C64(0xCF0B8447CC8A8FB1), + SPH_C64(0x9F765244979859A3), SPH_C64(0xA8D516B1A1240017), + SPH_C64(0x0BD7BA3EBB5DC726), SPH_C64(0xE54BCA55B86ADB39), + SPH_C64(0x1D7A3AFD6C478063), SPH_C64(0x519EC608E7669EDD), + SPH_C64(0x0E5715A2D149AA23), SPH_C64(0x177D4571848FF194), + SPH_C64(0xEEB55F3241014C22), SPH_C64(0x0F5E5CA13A6E2EC2), + SPH_C64(0x8029927B75F5C361), SPH_C64(0xAD139FABC3D6E436), + SPH_C64(0x0D5DF1A94CCF402F), SPH_C64(0x3E8BD948BEA5DFC8), + SPH_C64(0xA5A0D357BD3FF77E), SPH_C64(0xA2D12E251F74F645), + SPH_C64(0x66FD9E525E81A082), SPH_C64(0x2E0C90CE7F687A49), + SPH_C64(0xC2E8BCBEBA973BC5), SPH_C64(0x000001BCE509745F), + SPH_C64(0x423777BBE6DAB3D6), SPH_C64(0xD1661C7EAEF06EB5), + SPH_C64(0xA1781F354DAACFD8), SPH_C64(0x2D11284A2B16AFFC), + SPH_C64(0xF1FC4F67FA891D1F), SPH_C64(0x73ECC25DCB920ADA), + SPH_C64(0xAE610C22C2A12651), SPH_C64(0x96E0A810D356B78A), + SPH_C64(0x5A9A381F2FE7870F), SPH_C64(0xD5AD62EDE94E5530), + SPH_C64(0xD225E5E8368D1427), SPH_C64(0x65977B70C7AF4631), + SPH_C64(0x99F889B2DE39D74F), SPH_C64(0x233F30BF54E1D143), + SPH_C64(0x9A9675D3D9A63C97), SPH_C64(0x5470554FF334F9A8), + SPH_C64(0x166ACB744A4F5688), SPH_C64(0x70C74CAAB2E4AEAD), + SPH_C64(0xF0D091646F294D12), SPH_C64(0x57B82A89684031D1), + SPH_C64(0xEFD95A5A61BE0B6B), SPH_C64(0x2FBD12E969F2F29A), + SPH_C64(0x9BD37013FEFF9FE8), SPH_C64(0x3F9B0404D6085A06), + SPH_C64(0x4940C1F3166CFE15), SPH_C64(0x09542C4DCDF3DEFB), + SPH_C64(0xB4C5218385CD5CE3), SPH_C64(0xC935B7DC4462A641), + SPH_C64(0x3417F8A68ED3B63F), SPH_C64(0xB80959295B215B40), + SPH_C64(0xF99CDAEF3B8C8572), SPH_C64(0x018C0614F8FCB95D), + SPH_C64(0x1B14ACCD1A3ACDF3), SPH_C64(0x84D471F200BB732D), + SPH_C64(0xC1A3110E95E8DA16), SPH_C64(0x430A7220BF1A82B8), + SPH_C64(0xB77E090D39DF210E), SPH_C64(0x5EF4BD9F3CD05E9D), + SPH_C64(0x9D4FF6DA7E57A444), SPH_C64(0xDA1D60E183D4A5F8), + SPH_C64(0xB287C38417998E47), SPH_C64(0xFE3EDC121BB31886), + SPH_C64(0xC7FE3CCC980CCBEF), SPH_C64(0xE46FB590189BFD03), + SPH_C64(0x3732FD469A4C57DC), SPH_C64(0x7EF700A07CF1AD65), + SPH_C64(0x59C64468A31D8859), SPH_C64(0x762FB0B4D45B61F6), + SPH_C64(0x155BAED099047718), SPH_C64(0x68755E4C3D50BAA6), + SPH_C64(0xE9214E7F22D8B4DF), SPH_C64(0x2ADDBF532EAC95F4), + SPH_C64(0x32AE3909B4BD0109), SPH_C64(0x834DF537B08E3450), + SPH_C64(0xFA209DA84220728D), SPH_C64(0x9E691D9B9EFE23F7), + SPH_C64(0x0446D288C4AE8D7F), SPH_C64(0x7B4CC524E169785B), + SPH_C64(0x21D87F0135CA1385), SPH_C64(0xCEBB400F137B8AA5), + SPH_C64(0x272E2B66580796BE), SPH_C64(0x3612264125C2B0DE), + SPH_C64(0x057702BDAD1EFBB2), SPH_C64(0xD4BABB8EACF84BE9), + SPH_C64(0x91583139641BC67B), SPH_C64(0x8BDC2DE08036E024), + SPH_C64(0x603C8156F49F68ED), SPH_C64(0xF7D236F7DBEF5111), + SPH_C64(0x9727C4598AD21E80), SPH_C64(0xA08A0896670A5FD7), + SPH_C64(0xCB4A8F4309EBA9CB), SPH_C64(0x81AF564B0F7036A1), + SPH_C64(0xC0B99AA778199ABD), SPH_C64(0x959F1EC83FC8E952), + SPH_C64(0x8C505077794A81B9), SPH_C64(0x3ACAAF8F056338F0), + SPH_C64(0x07B43F50627A6778), SPH_C64(0x4A44AB49F5ECCC77), + SPH_C64(0x3BC3D6E4B679EE98), SPH_C64(0x9CC0D4D1CF14108C), + SPH_C64(0x4406C00B206BC8A0), SPH_C64(0x82A18854C8D72D89), + SPH_C64(0x67E366B35C3C432C), SPH_C64(0xB923DD61102B37F2), + SPH_C64(0x56AB2779D884271D), SPH_C64(0xBE83E1B0FF1525AF), + SPH_C64(0xFB7C65D4217E49A9), SPH_C64(0x6BDBE0E76D48E7D4), + SPH_C64(0x08DF828745D9179E), SPH_C64(0x22EA6A9ADD53BD34), + SPH_C64(0xE36E141C5622200A), SPH_C64(0x7F805D1B8CB750EE), + SPH_C64(0xAFE5C7A59F58E837), SPH_C64(0xE27F996A4FB1C23C), + SPH_C64(0xD3867DFB0775F0D0), SPH_C64(0xD0E673DE6E88891A), + SPH_C64(0x123AEB9EAFB86C25), SPH_C64(0x30F1D5D5C145B895), + SPH_C64(0xBB434A2DEE7269E7), SPH_C64(0x78CB67ECF931FA38), + SPH_C64(0xF33B0372323BBF9C), SPH_C64(0x52D66336FB279C74), + SPH_C64(0x505F33AC0AFB4EAA), SPH_C64(0xE8A5CD99A2CCE187), + SPH_C64(0x534974801E2D30BB), SPH_C64(0x8D2D5711D5876D90), + SPH_C64(0x1F1A412891BC038E), SPH_C64(0xD6E2E71D82E56648), + SPH_C64(0x74036C3A497732B7), SPH_C64(0x89B67ED96361F5AB), + SPH_C64(0xFFED95D8F1EA02A2), SPH_C64(0xE72B3BD61464D43D), + SPH_C64(0xA6300F170BDC4820), SPH_C64(0xEBC18760ED78A77A) +}; + +static const uint64_t cpu_T2[256] = { + SPH_C64(0xE6A6BE5A05A12138), SPH_C64(0xB5A122A5B4F87C98), + SPH_C64(0x563C6089140B6990), SPH_C64(0x4C46CB2E391F5DD5), + SPH_C64(0xD932ADDBC9B79434), SPH_C64(0x08EA70E42015AFF5), + SPH_C64(0xD765A6673E478CF1), SPH_C64(0xC4FB757EAB278D99), + SPH_C64(0xDF11C6862D6E0692), SPH_C64(0xDDEB84F10D7F3B16), + SPH_C64(0x6F2EF604A665EA04), SPH_C64(0x4A8E0F0FF0E0DFB3), + SPH_C64(0xA5EDEEF83DBCBA51), SPH_C64(0xFC4F0A2A0EA4371E), + SPH_C64(0xE83E1DA85CB38429), SPH_C64(0xDC8FF882BA1B1CE2), + SPH_C64(0xCD45505E8353E80D), SPH_C64(0x18D19A00D4DB0717), + SPH_C64(0x34A0CFEDA5F38101), SPH_C64(0x0BE77E518887CAF2), + SPH_C64(0x1E341438B3C45136), SPH_C64(0xE05797F49089CCF9), + SPH_C64(0xFFD23F9DF2591D14), SPH_C64(0x543DDA228595C5CD), + SPH_C64(0x661F81FD99052A33), SPH_C64(0x8736E641DB0F7B76), + SPH_C64(0x15227725418E5307), SPH_C64(0xE25F7F46162EB2FA), + SPH_C64(0x48A8B2126C13D9FE), SPH_C64(0xAFDC541792E76EEA), + SPH_C64(0x03D912BFC6D1898F), SPH_C64(0x31B1AAFA1B83F51B), + SPH_C64(0xF1AC2796E42AB7D9), SPH_C64(0x40A3A7D7FCD2EBAC), + SPH_C64(0x1056136D0AFBBCC5), SPH_C64(0x7889E1DD9A6D0C85), + SPH_C64(0xD33525782A7974AA), SPH_C64(0xA7E25D09078AC09B), + SPH_C64(0xBD4138B3EAC6EDD0), SPH_C64(0x920ABFBE71EB9E70), + SPH_C64(0xA2A5D0F54FC2625C), SPH_C64(0xC054E36B0B1290A3), + SPH_C64(0xF6DD59FF62FE932B), SPH_C64(0x3537354511A8AC7D), + SPH_C64(0xCA845E9172FADCD4), SPH_C64(0x84F82B60329D20DC), + SPH_C64(0x79C62CE1CD672F18), SPH_C64(0x8B09A2ADD124642C), + SPH_C64(0xD0C1E96A19D9E726), SPH_C64(0x5A786A9B4BA9500C), + SPH_C64(0x0E020336634C43F3), SPH_C64(0xC17B474AEB66D822), + SPH_C64(0x6A731AE3EC9BAAC2), SPH_C64(0x8226667AE0840258), + SPH_C64(0x67D4567691CAECA5), SPH_C64(0x1D94155C4875ADB5), + SPH_C64(0x6D00FD985B813FDF), SPH_C64(0x51286EFCB774CD06), + SPH_C64(0x5E8834471FA744AF), SPH_C64(0xF72CA0AEE761AE2E), + SPH_C64(0xBE40E4CDAEE8E09A), SPH_C64(0xE9970BBB5118F665), + SPH_C64(0x726E4BEB33DF1964), SPH_C64(0x703B000729199762), + SPH_C64(0x4631D816F5EF30A7), SPH_C64(0xB880B5B51504A6BE), + SPH_C64(0x641793C37ED84B6C), SPH_C64(0x7B21ED77F6E97D96), + SPH_C64(0x776306312EF96B73), SPH_C64(0xAE528948E86FF3F4), + SPH_C64(0x53DBD7F286A3F8F8), SPH_C64(0x16CADCE74CFC1063), + SPH_C64(0x005C19BDFA52C6DD), SPH_C64(0x68868F5D64D46AD3), + SPH_C64(0x3A9D512CCF1E186A), SPH_C64(0x367E62C2385660AE), + SPH_C64(0xE359E7EA77DCB1D7), SPH_C64(0x526C0773749ABE6E), + SPH_C64(0x735AE5F9D09F734B), SPH_C64(0x493FC7CC8A558BA8), + SPH_C64(0xB0B9C1533041AB45), SPH_C64(0x321958BA470A59BD), + SPH_C64(0x852DB00B5F46C393), SPH_C64(0x91209B2BD336B0E5), + SPH_C64(0x6E604F7D659EF19F), SPH_C64(0xB99A8AE2782CCB24), + SPH_C64(0xCCF52AB6C814C4C7), SPH_C64(0x4727D9AFBE11727B), + SPH_C64(0x7E950D0C0121B34D), SPH_C64(0x756F435670AD471F), + SPH_C64(0xF5ADD442615A6849), SPH_C64(0x4E87E09980B9957A), + SPH_C64(0x2ACFA1DF50AEE355), SPH_C64(0xD898263AFD2FD556), + SPH_C64(0xC8F4924DD80C8FD6), SPH_C64(0xCF99CA3D754A173A), + SPH_C64(0xFE477BACAF91BF3C), SPH_C64(0xED5371F6D690C12D), + SPH_C64(0x831A5C285E687094), SPH_C64(0xC5D3C90A3708A0A4), + SPH_C64(0x0F7F903717D06580), SPH_C64(0x19F9BB13B8FDF27F), + SPH_C64(0xB1BD6F1B4D502843), SPH_C64(0x1C761BA38FFF4012), + SPH_C64(0x0D1530C4E2E21F3B), SPH_C64(0x8943CE69A7372C8A), + SPH_C64(0xE5184E11FEB5CE66), SPH_C64(0x618BDB80BD736621), + SPH_C64(0x7D29BAD68B574D0B), SPH_C64(0x81BB613E25E6FE5B), + SPH_C64(0x071C9C10BC07913F), SPH_C64(0xC7BEEB7909AC2D97), + SPH_C64(0xC3E58D353BC5D757), SPH_C64(0xEB017892F38F61E8), + SPH_C64(0xD4EFFB9C9B1CC21A), SPH_C64(0x99727D26F494F7AB), + SPH_C64(0xA3E063A2956B3E03), SPH_C64(0x9D4A8B9A4AA09C30), + SPH_C64(0x3F6AB7D500090FB4), SPH_C64(0x9CC0F2A057268AC0), + SPH_C64(0x3DEE9D2DEDBF42D1), SPH_C64(0x330F49C87960A972), + SPH_C64(0xC6B2720287421B41), SPH_C64(0x0AC59EC07C00369C), + SPH_C64(0xEF4EAC49CB353425), SPH_C64(0xF450244EEF0129D8), + SPH_C64(0x8ACC46E5CAF4DEB6), SPH_C64(0x2FFEAB63989263F7), + SPH_C64(0x8F7CB9FE5D7A4578), SPH_C64(0x5BD8F7644E634635), + SPH_C64(0x427A7315BF2DC900), SPH_C64(0x17D0C4AA2125261C), + SPH_C64(0x3992486C93518E50), SPH_C64(0xB4CBFEE0A2D7D4C3), + SPH_C64(0x7C75D6202C5DDD8D), SPH_C64(0xDBC295D8E35B6C61), + SPH_C64(0x60B369D302032B19), SPH_C64(0xCE42685FDCE44132), + SPH_C64(0x06F3DDB9DDF65610), SPH_C64(0x8EA4D21DB5E148F0), + SPH_C64(0x20B0FCE62FCD496F), SPH_C64(0x2C1B912358B0EE31), + SPH_C64(0xB28317B818F5A308), SPH_C64(0xA89C1E189CA6D2CF), + SPH_C64(0x0C6B18576AAADBC8), SPH_C64(0xB65DEAA91299FAE3), + SPH_C64(0xFB2B794B7F1027E7), SPH_C64(0x04E4317F443B5BEB), + SPH_C64(0x4B852D325939D0A6), SPH_C64(0xD5AE6BEEFB207FFC), + SPH_C64(0x309682B281C7D374), SPH_C64(0xBAE309A194C3B475), + SPH_C64(0x8CC3F97B13B49F05), SPH_C64(0x98A9422FF8293967), + SPH_C64(0x244B16B01076FF7C), SPH_C64(0xF8BF571C663D67EE), + SPH_C64(0x1F0D6758EEE30DA1), SPH_C64(0xC9B611D97ADEB9B7), + SPH_C64(0xB7AFD5887B6C57A2), SPH_C64(0x6290AE846B984FE1), + SPH_C64(0x94DF4CDEACC1A5FD), SPH_C64(0x058A5BD1C5483AFF), + SPH_C64(0x63166CC142BA3C37), SPH_C64(0x8DB8526EB2F76F40), + SPH_C64(0xE10880036F0D6D4E), SPH_C64(0x9E0523C9971D311D), + SPH_C64(0x45EC2824CC7CD691), SPH_C64(0x575B8359E62382C9), + SPH_C64(0xFA9E400DC4889995), SPH_C64(0xD1823ECB45721568), + SPH_C64(0xDAFD983B8206082F), SPH_C64(0xAA7D29082386A8CB), + SPH_C64(0x269FCD4403B87588), SPH_C64(0x1B91F5F728BDD1E0), + SPH_C64(0xE4669F39040201F6), SPH_C64(0x7A1D7C218CF04ADE), + SPH_C64(0x65623C29D79CE5CE), SPH_C64(0x2368449096C00BB1), + SPH_C64(0xAB9BF1879DA503BA), SPH_C64(0xBC23ECB1A458058E), + SPH_C64(0x9A58DF01BB401ECC), SPH_C64(0xA070E868A85F143D), + SPH_C64(0x4FF188307DF2239E), SPH_C64(0x14D565B41A641183), + SPH_C64(0xEE13337452701602), SPH_C64(0x950E3DCF3F285E09), + SPH_C64(0x59930254B9C80953), SPH_C64(0x3BF299408930DA6D), + SPH_C64(0xA955943F53691387), SPH_C64(0xA15EDECAA9CB8784), + SPH_C64(0x29142127352BE9A0), SPH_C64(0x76F0371FFF4E7AFB), + SPH_C64(0x0239F450274F2228), SPH_C64(0xBB073AF01D5E868B), + SPH_C64(0xBFC80571C10E96C1), SPH_C64(0xD267088568222E23), + SPH_C64(0x9671A3D48E80B5B0), SPH_C64(0x55B5D38AE193BB81), + SPH_C64(0x693AE2D0A18B04B8), SPH_C64(0x5C48B4ECADD5335F), + SPH_C64(0xFD743B194916A1CA), SPH_C64(0x2577018134BE98C4), + SPH_C64(0xE77987E83C54A4AD), SPH_C64(0x28E11014DA33E1B9), + SPH_C64(0x270CC59E226AA213), SPH_C64(0x71495F756D1A5F60), + SPH_C64(0x9BE853FB60AFEF77), SPH_C64(0xADC786A7F7443DBF), + SPH_C64(0x0904456173B29A82), SPH_C64(0x58BC7A66C232BD5E), + SPH_C64(0xF306558C673AC8B2), SPH_C64(0x41F639C6B6C9772A), + SPH_C64(0x216DEFE99FDA35DA), SPH_C64(0x11640CC71C7BE615), + SPH_C64(0x93C43694565C5527), SPH_C64(0xEA038E6246777839), + SPH_C64(0xF9ABF3CE5A3E2469), SPH_C64(0x741E768D0FD312D2), + SPH_C64(0x0144B883CED652C6), SPH_C64(0xC20B5A5BA33F8552), + SPH_C64(0x1AE69633C3435A9D), SPH_C64(0x97A28CA4088CFDEC), + SPH_C64(0x8824A43C1E96F420), SPH_C64(0x37612FA66EEEA746), + SPH_C64(0x6B4CB165F9CF0E5A), SPH_C64(0x43AA1C06A0ABFB4A), + SPH_C64(0x7F4DC26FF162796B), SPH_C64(0x6CBACC8E54ED9B0F), + SPH_C64(0xA6B7FFEFD2BB253E), SPH_C64(0x2E25BC95B0A29D4F), + SPH_C64(0x86D6A58BDEF1388C), SPH_C64(0xDED74AC576B6F054), + SPH_C64(0x8030BDBC2B45805D), SPH_C64(0x3C81AF70E94D9289), + SPH_C64(0x3EFF6DDA9E3100DB), SPH_C64(0xB38DC39FDFCC8847), + SPH_C64(0x123885528D17B87E), SPH_C64(0xF2DA0ED240B1B642), + SPH_C64(0x44CEFADCD54BF9A9), SPH_C64(0x1312200E433C7EE6), + SPH_C64(0x9FFCC84F3A78C748), SPH_C64(0xF0CD1F72248576BB), + SPH_C64(0xEC6974053638CFE4), SPH_C64(0x2BA7B67C0CEC4E4C), + SPH_C64(0xAC2F4DF3E5CE32ED), SPH_C64(0xCB33D14326EA4C11), + SPH_C64(0xA4E9044CC77E58BC), SPH_C64(0x5F513293D934FCEF), + SPH_C64(0x5DC9645506E55444), SPH_C64(0x50DE418F317DE40A), + SPH_C64(0x388CB31A69DDE259), SPH_C64(0x2DB4A83455820A86), + SPH_C64(0x9010A91E84711AE9), SPH_C64(0x4DF7F0B7B1498371), + SPH_C64(0xD62A2EABC0977179), SPH_C64(0x22FAC097AA8D5C0E) +}; + +static const uint64_t cpu_T3[256] = { + SPH_C64(0xF49FCC2FF1DAF39B), SPH_C64(0x487FD5C66FF29281), + SPH_C64(0xE8A30667FCDCA83F), SPH_C64(0x2C9B4BE3D2FCCE63), + SPH_C64(0xDA3FF74B93FBBBC2), SPH_C64(0x2FA165D2FE70BA66), + SPH_C64(0xA103E279970E93D4), SPH_C64(0xBECDEC77B0E45E71), + SPH_C64(0xCFB41E723985E497), SPH_C64(0xB70AAA025EF75017), + SPH_C64(0xD42309F03840B8E0), SPH_C64(0x8EFC1AD035898579), + SPH_C64(0x96C6920BE2B2ABC5), SPH_C64(0x66AF4163375A9172), + SPH_C64(0x2174ABDCCA7127FB), SPH_C64(0xB33CCEA64A72FF41), + SPH_C64(0xF04A4933083066A5), SPH_C64(0x8D970ACDD7289AF5), + SPH_C64(0x8F96E8E031C8C25E), SPH_C64(0xF3FEC02276875D47), + SPH_C64(0xEC7BF310056190DD), SPH_C64(0xF5ADB0AEBB0F1491), + SPH_C64(0x9B50F8850FD58892), SPH_C64(0x4975488358B74DE8), + SPH_C64(0xA3354FF691531C61), SPH_C64(0x0702BBE481D2C6EE), + SPH_C64(0x89FB24057DEDED98), SPH_C64(0xAC3075138596E902), + SPH_C64(0x1D2D3580172772ED), SPH_C64(0xEB738FC28E6BC30D), + SPH_C64(0x5854EF8F63044326), SPH_C64(0x9E5C52325ADD3BBE), + SPH_C64(0x90AA53CF325C4623), SPH_C64(0xC1D24D51349DD067), + SPH_C64(0x2051CFEEA69EA624), SPH_C64(0x13220F0A862E7E4F), + SPH_C64(0xCE39399404E04864), SPH_C64(0xD9C42CA47086FCB7), + SPH_C64(0x685AD2238A03E7CC), SPH_C64(0x066484B2AB2FF1DB), + SPH_C64(0xFE9D5D70EFBF79EC), SPH_C64(0x5B13B9DD9C481854), + SPH_C64(0x15F0D475ED1509AD), SPH_C64(0x0BEBCD060EC79851), + SPH_C64(0xD58C6791183AB7F8), SPH_C64(0xD1187C5052F3EEE4), + SPH_C64(0xC95D1192E54E82FF), SPH_C64(0x86EEA14CB9AC6CA2), + SPH_C64(0x3485BEB153677D5D), SPH_C64(0xDD191D781F8C492A), + SPH_C64(0xF60866BAA784EBF9), SPH_C64(0x518F643BA2D08C74), + SPH_C64(0x8852E956E1087C22), SPH_C64(0xA768CB8DC410AE8D), + SPH_C64(0x38047726BFEC8E1A), SPH_C64(0xA67738B4CD3B45AA), + SPH_C64(0xAD16691CEC0DDE19), SPH_C64(0xC6D4319380462E07), + SPH_C64(0xC5A5876D0BA61938), SPH_C64(0x16B9FA1FA58FD840), + SPH_C64(0x188AB1173CA74F18), SPH_C64(0xABDA2F98C99C021F), + SPH_C64(0x3E0580AB134AE816), SPH_C64(0x5F3B05B773645ABB), + SPH_C64(0x2501A2BE5575F2F6), SPH_C64(0x1B2F74004E7E8BA9), + SPH_C64(0x1CD7580371E8D953), SPH_C64(0x7F6ED89562764E30), + SPH_C64(0xB15926FF596F003D), SPH_C64(0x9F65293DA8C5D6B9), + SPH_C64(0x6ECEF04DD690F84C), SPH_C64(0x4782275FFF33AF88), + SPH_C64(0xE41433083F820801), SPH_C64(0xFD0DFE409A1AF9B5), + SPH_C64(0x4325A3342CDB396B), SPH_C64(0x8AE77E62B301B252), + SPH_C64(0xC36F9E9F6655615A), SPH_C64(0x85455A2D92D32C09), + SPH_C64(0xF2C7DEA949477485), SPH_C64(0x63CFB4C133A39EBA), + SPH_C64(0x83B040CC6EBC5462), SPH_C64(0x3B9454C8FDB326B0), + SPH_C64(0x56F56A9E87FFD78C), SPH_C64(0x2DC2940D99F42BC6), + SPH_C64(0x98F7DF096B096E2D), SPH_C64(0x19A6E01E3AD852BF), + SPH_C64(0x42A99CCBDBD4B40B), SPH_C64(0xA59998AF45E9C559), + SPH_C64(0x366295E807D93186), SPH_C64(0x6B48181BFAA1F773), + SPH_C64(0x1FEC57E2157A0A1D), SPH_C64(0x4667446AF6201AD5), + SPH_C64(0xE615EBCACFB0F075), SPH_C64(0xB8F31F4F68290778), + SPH_C64(0x22713ED6CE22D11E), SPH_C64(0x3057C1A72EC3C93B), + SPH_C64(0xCB46ACC37C3F1F2F), SPH_C64(0xDBB893FD02AAF50E), + SPH_C64(0x331FD92E600B9FCF), SPH_C64(0xA498F96148EA3AD6), + SPH_C64(0xA8D8426E8B6A83EA), SPH_C64(0xA089B274B7735CDC), + SPH_C64(0x87F6B3731E524A11), SPH_C64(0x118808E5CBC96749), + SPH_C64(0x9906E4C7B19BD394), SPH_C64(0xAFED7F7E9B24A20C), + SPH_C64(0x6509EADEEB3644A7), SPH_C64(0x6C1EF1D3E8EF0EDE), + SPH_C64(0xB9C97D43E9798FB4), SPH_C64(0xA2F2D784740C28A3), + SPH_C64(0x7B8496476197566F), SPH_C64(0x7A5BE3E6B65F069D), + SPH_C64(0xF96330ED78BE6F10), SPH_C64(0xEEE60DE77A076A15), + SPH_C64(0x2B4BEE4AA08B9BD0), SPH_C64(0x6A56A63EC7B8894E), + SPH_C64(0x02121359BA34FEF4), SPH_C64(0x4CBF99F8283703FC), + SPH_C64(0x398071350CAF30C8), SPH_C64(0xD0A77A89F017687A), + SPH_C64(0xF1C1A9EB9E423569), SPH_C64(0x8C7976282DEE8199), + SPH_C64(0x5D1737A5DD1F7ABD), SPH_C64(0x4F53433C09A9FA80), + SPH_C64(0xFA8B0C53DF7CA1D9), SPH_C64(0x3FD9DCBC886CCB77), + SPH_C64(0xC040917CA91B4720), SPH_C64(0x7DD00142F9D1DCDF), + SPH_C64(0x8476FC1D4F387B58), SPH_C64(0x23F8E7C5F3316503), + SPH_C64(0x032A2244E7E37339), SPH_C64(0x5C87A5D750F5A74B), + SPH_C64(0x082B4CC43698992E), SPH_C64(0xDF917BECB858F63C), + SPH_C64(0x3270B8FC5BF86DDA), SPH_C64(0x10AE72BB29B5DD76), + SPH_C64(0x576AC94E7700362B), SPH_C64(0x1AD112DAC61EFB8F), + SPH_C64(0x691BC30EC5FAA427), SPH_C64(0xFF246311CC327143), + SPH_C64(0x3142368E30E53206), SPH_C64(0x71380E31E02CA396), + SPH_C64(0x958D5C960AAD76F1), SPH_C64(0xF8D6F430C16DA536), + SPH_C64(0xC8FFD13F1BE7E1D2), SPH_C64(0x7578AE66004DDBE1), + SPH_C64(0x05833F01067BE646), SPH_C64(0xBB34B5AD3BFE586D), + SPH_C64(0x095F34C9A12B97F0), SPH_C64(0x247AB64525D60CA8), + SPH_C64(0xDCDBC6F3017477D1), SPH_C64(0x4A2E14D4DECAD24D), + SPH_C64(0xBDB5E6D9BE0A1EEB), SPH_C64(0x2A7E70F7794301AB), + SPH_C64(0xDEF42D8A270540FD), SPH_C64(0x01078EC0A34C22C1), + SPH_C64(0xE5DE511AF4C16387), SPH_C64(0x7EBB3A52BD9A330A), + SPH_C64(0x77697857AA7D6435), SPH_C64(0x004E831603AE4C32), + SPH_C64(0xE7A21020AD78E312), SPH_C64(0x9D41A70C6AB420F2), + SPH_C64(0x28E06C18EA1141E6), SPH_C64(0xD2B28CBD984F6B28), + SPH_C64(0x26B75F6C446E9D83), SPH_C64(0xBA47568C4D418D7F), + SPH_C64(0xD80BADBFE6183D8E), SPH_C64(0x0E206D7F5F166044), + SPH_C64(0xE258A43911CBCA3E), SPH_C64(0x723A1746B21DC0BC), + SPH_C64(0xC7CAA854F5D7CDD3), SPH_C64(0x7CAC32883D261D9C), + SPH_C64(0x7690C26423BA942C), SPH_C64(0x17E55524478042B8), + SPH_C64(0xE0BE477656A2389F), SPH_C64(0x4D289B5E67AB2DA0), + SPH_C64(0x44862B9C8FBBFD31), SPH_C64(0xB47CC8049D141365), + SPH_C64(0x822C1B362B91C793), SPH_C64(0x4EB14655FB13DFD8), + SPH_C64(0x1ECBBA0714E2A97B), SPH_C64(0x6143459D5CDE5F14), + SPH_C64(0x53A8FBF1D5F0AC89), SPH_C64(0x97EA04D81C5E5B00), + SPH_C64(0x622181A8D4FDB3F3), SPH_C64(0xE9BCD341572A1208), + SPH_C64(0x1411258643CCE58A), SPH_C64(0x9144C5FEA4C6E0A4), + SPH_C64(0x0D33D06565CF620F), SPH_C64(0x54A48D489F219CA1), + SPH_C64(0xC43E5EAC6D63C821), SPH_C64(0xA9728B3A72770DAF), + SPH_C64(0xD7934E7B20DF87EF), SPH_C64(0xE35503B61A3E86E5), + SPH_C64(0xCAE321FBC819D504), SPH_C64(0x129A50B3AC60BFA6), + SPH_C64(0xCD5E68EA7E9FB6C3), SPH_C64(0xB01C90199483B1C7), + SPH_C64(0x3DE93CD5C295376C), SPH_C64(0xAED52EDF2AB9AD13), + SPH_C64(0x2E60F512C0A07884), SPH_C64(0xBC3D86A3E36210C9), + SPH_C64(0x35269D9B163951CE), SPH_C64(0x0C7D6E2AD0CDB5FA), + SPH_C64(0x59E86297D87F5733), SPH_C64(0x298EF221898DB0E7), + SPH_C64(0x55000029D1A5AA7E), SPH_C64(0x8BC08AE1B5061B45), + SPH_C64(0xC2C31C2B6C92703A), SPH_C64(0x94CC596BAF25EF42), + SPH_C64(0x0A1D73DB22540456), SPH_C64(0x04B6A0F9D9C4179A), + SPH_C64(0xEFFDAFA2AE3D3C60), SPH_C64(0xF7C8075BB49496C4), + SPH_C64(0x9CC5C7141D1CD4E3), SPH_C64(0x78BD1638218E5534), + SPH_C64(0xB2F11568F850246A), SPH_C64(0xEDFABCFA9502BC29), + SPH_C64(0x796CE5F2DA23051B), SPH_C64(0xAAE128B0DC93537C), + SPH_C64(0x3A493DA0EE4B29AE), SPH_C64(0xB5DF6B2C416895D7), + SPH_C64(0xFCABBD25122D7F37), SPH_C64(0x70810B58105DC4B1), + SPH_C64(0xE10FDD37F7882A90), SPH_C64(0x524DCAB5518A3F5C), + SPH_C64(0x3C9E85878451255B), SPH_C64(0x4029828119BD34E2), + SPH_C64(0x74A05B6F5D3CECCB), SPH_C64(0xB610021542E13ECA), + SPH_C64(0x0FF979D12F59E2AC), SPH_C64(0x6037DA27E4F9CC50), + SPH_C64(0x5E92975A0DF1847D), SPH_C64(0xD66DE190D3E623FE), + SPH_C64(0x5032D6B87B568048), SPH_C64(0x9A36B7CE8235216E), + SPH_C64(0x80272A7A24F64B4A), SPH_C64(0x93EFED8B8C6916F7), + SPH_C64(0x37DDBFF44CCE1555), SPH_C64(0x4B95DB5D4B99BD25), + SPH_C64(0x92D3FDA169812FC0), SPH_C64(0xFB1A4A9A90660BB6), + SPH_C64(0x730C196946A4B9B2), SPH_C64(0x81E289AA7F49DA68), + SPH_C64(0x64669A0F83B1A05F), SPH_C64(0x27B3FF7D9644F48B), + SPH_C64(0xCC6B615C8DB675B3), SPH_C64(0x674F20B9BCEBBE95), + SPH_C64(0x6F31238275655982), SPH_C64(0x5AE488713E45CF05), + SPH_C64(0xBF619F9954C21157), SPH_C64(0xEABAC46040A8EAE9), + SPH_C64(0x454C6FE9F2C0C1CD), SPH_C64(0x419CF6496412691C), + SPH_C64(0xD3DC3BEF265B0F70), SPH_C64(0x6D0E60F5C3578A9E) +}; + +static const uint64_t cpu_T4[256] = { + SPH_C64(0x5B0E608526323C55), SPH_C64(0x1A46C1A9FA1B59F5), + SPH_C64(0xA9E245A17C4C8FFA), SPH_C64(0x65CA5159DB2955D7), + SPH_C64(0x05DB0A76CE35AFC2), SPH_C64(0x81EAC77EA9113D45), + SPH_C64(0x528EF88AB6AC0A0D), SPH_C64(0xA09EA253597BE3FF), + SPH_C64(0x430DDFB3AC48CD56), SPH_C64(0xC4B3A67AF45CE46F), + SPH_C64(0x4ECECFD8FBE2D05E), SPH_C64(0x3EF56F10B39935F0), + SPH_C64(0x0B22D6829CD619C6), SPH_C64(0x17FD460A74DF2069), + SPH_C64(0x6CF8CC8E8510ED40), SPH_C64(0xD6C824BF3A6ECAA7), + SPH_C64(0x61243D581A817049), SPH_C64(0x048BACB6BBC163A2), + SPH_C64(0xD9A38AC27D44CC32), SPH_C64(0x7FDDFF5BAAF410AB), + SPH_C64(0xAD6D495AA804824B), SPH_C64(0xE1A6A74F2D8C9F94), + SPH_C64(0xD4F7851235DEE8E3), SPH_C64(0xFD4B7F886540D893), + SPH_C64(0x247C20042AA4BFDA), SPH_C64(0x096EA1C517D1327C), + SPH_C64(0xD56966B4361A6685), SPH_C64(0x277DA5C31221057D), + SPH_C64(0x94D59893A43ACFF7), SPH_C64(0x64F0C51CCDC02281), + SPH_C64(0x3D33BCC4FF6189DB), SPH_C64(0xE005CB184CE66AF1), + SPH_C64(0xFF5CCD1D1DB99BEA), SPH_C64(0xB0B854A7FE42980F), + SPH_C64(0x7BD46A6A718D4B9F), SPH_C64(0xD10FA8CC22A5FD8C), + SPH_C64(0xD31484952BE4BD31), SPH_C64(0xC7FA975FCB243847), + SPH_C64(0x4886ED1E5846C407), SPH_C64(0x28CDDB791EB70B04), + SPH_C64(0xC2B00BE2F573417F), SPH_C64(0x5C9590452180F877), + SPH_C64(0x7A6BDDFFF370EB00), SPH_C64(0xCE509E38D6D9D6A4), + SPH_C64(0xEBEB0F00647FA702), SPH_C64(0x1DCC06CF76606F06), + SPH_C64(0xE4D9F28BA286FF0A), SPH_C64(0xD85A305DC918C262), + SPH_C64(0x475B1D8732225F54), SPH_C64(0x2D4FB51668CCB5FE), + SPH_C64(0xA679B9D9D72BBA20), SPH_C64(0x53841C0D912D43A5), + SPH_C64(0x3B7EAA48BF12A4E8), SPH_C64(0x781E0E47F22F1DDF), + SPH_C64(0xEFF20CE60AB50973), SPH_C64(0x20D261D19DFFB742), + SPH_C64(0x16A12B03062A2E39), SPH_C64(0x1960EB2239650495), + SPH_C64(0x251C16FED50EB8B8), SPH_C64(0x9AC0C330F826016E), + SPH_C64(0xED152665953E7671), SPH_C64(0x02D63194A6369570), + SPH_C64(0x5074F08394B1C987), SPH_C64(0x70BA598C90B25CE1), + SPH_C64(0x794A15810B9742F6), SPH_C64(0x0D5925E9FCAF8C6C), + SPH_C64(0x3067716CD868744E), SPH_C64(0x910AB077E8D7731B), + SPH_C64(0x6A61BBDB5AC42F61), SPH_C64(0x93513EFBF0851567), + SPH_C64(0xF494724B9E83E9D5), SPH_C64(0xE887E1985C09648D), + SPH_C64(0x34B1D3C675370CFD), SPH_C64(0xDC35E433BC0D255D), + SPH_C64(0xD0AAB84234131BE0), SPH_C64(0x08042A50B48B7EAF), + SPH_C64(0x9997C4EE44A3AB35), SPH_C64(0x829A7B49201799D0), + SPH_C64(0x263B8307B7C54441), SPH_C64(0x752F95F4FD6A6CA6), + SPH_C64(0x927217402C08C6E5), SPH_C64(0x2A8AB754A795D9EE), + SPH_C64(0xA442F7552F72943D), SPH_C64(0x2C31334E19781208), + SPH_C64(0x4FA98D7CEAEE6291), SPH_C64(0x55C3862F665DB309), + SPH_C64(0xBD0610175D53B1F3), SPH_C64(0x46FE6CB840413F27), + SPH_C64(0x3FE03792DF0CFA59), SPH_C64(0xCFE700372EB85E8F), + SPH_C64(0xA7BE29E7ADBCE118), SPH_C64(0xE544EE5CDE8431DD), + SPH_C64(0x8A781B1B41F1873E), SPH_C64(0xA5C94C78A0D2F0E7), + SPH_C64(0x39412E2877B60728), SPH_C64(0xA1265EF3AFC9A62C), + SPH_C64(0xBCC2770C6A2506C5), SPH_C64(0x3AB66DD5DCE1CE12), + SPH_C64(0xE65499D04A675B37), SPH_C64(0x7D8F523481BFD216), + SPH_C64(0x0F6F64FCEC15F389), SPH_C64(0x74EFBE618B5B13C8), + SPH_C64(0xACDC82B714273E1D), SPH_C64(0xDD40BFE003199D17), + SPH_C64(0x37E99257E7E061F8), SPH_C64(0xFA52626904775AAA), + SPH_C64(0x8BBBF63A463D56F9), SPH_C64(0xF0013F1543A26E64), + SPH_C64(0xA8307E9F879EC898), SPH_C64(0xCC4C27A4150177CC), + SPH_C64(0x1B432F2CCA1D3348), SPH_C64(0xDE1D1F8F9F6FA013), + SPH_C64(0x606602A047A7DDD6), SPH_C64(0xD237AB64CC1CB2C7), + SPH_C64(0x9B938E7225FCD1D3), SPH_C64(0xEC4E03708E0FF476), + SPH_C64(0xFEB2FBDA3D03C12D), SPH_C64(0xAE0BCED2EE43889A), + SPH_C64(0x22CB8923EBFB4F43), SPH_C64(0x69360D013CF7396D), + SPH_C64(0x855E3602D2D4E022), SPH_C64(0x073805BAD01F784C), + SPH_C64(0x33E17A133852F546), SPH_C64(0xDF4874058AC7B638), + SPH_C64(0xBA92B29C678AA14A), SPH_C64(0x0CE89FC76CFAADCD), + SPH_C64(0x5F9D4E0908339E34), SPH_C64(0xF1AFE9291F5923B9), + SPH_C64(0x6E3480F60F4A265F), SPH_C64(0xEEBF3A2AB29B841C), + SPH_C64(0xE21938A88F91B4AD), SPH_C64(0x57DFEFF845C6D3C3), + SPH_C64(0x2F006B0BF62CAAF2), SPH_C64(0x62F479EF6F75EE78), + SPH_C64(0x11A55AD41C8916A9), SPH_C64(0xF229D29084FED453), + SPH_C64(0x42F1C27B16B000E6), SPH_C64(0x2B1F76749823C074), + SPH_C64(0x4B76ECA3C2745360), SPH_C64(0x8C98F463B91691BD), + SPH_C64(0x14BCC93CF1ADE66A), SPH_C64(0x8885213E6D458397), + SPH_C64(0x8E177DF0274D4711), SPH_C64(0xB49B73B5503F2951), + SPH_C64(0x10168168C3F96B6B), SPH_C64(0x0E3D963B63CAB0AE), + SPH_C64(0x8DFC4B5655A1DB14), SPH_C64(0xF789F1356E14DE5C), + SPH_C64(0x683E68AF4E51DAC1), SPH_C64(0xC9A84F9D8D4B0FD9), + SPH_C64(0x3691E03F52A0F9D1), SPH_C64(0x5ED86E46E1878E80), + SPH_C64(0x3C711A0E99D07150), SPH_C64(0x5A0865B20C4E9310), + SPH_C64(0x56FBFC1FE4F0682E), SPH_C64(0xEA8D5DE3105EDF9B), + SPH_C64(0x71ABFDB12379187A), SPH_C64(0x2EB99DE1BEE77B9C), + SPH_C64(0x21ECC0EA33CF4523), SPH_C64(0x59A4D7521805C7A1), + SPH_C64(0x3896F5EB56AE7C72), SPH_C64(0xAA638F3DB18F75DC), + SPH_C64(0x9F39358DABE9808E), SPH_C64(0xB7DEFA91C00B72AC), + SPH_C64(0x6B5541FD62492D92), SPH_C64(0x6DC6DEE8F92E4D5B), + SPH_C64(0x353F57ABC4BEEA7E), SPH_C64(0x735769D6DA5690CE), + SPH_C64(0x0A234AA642391484), SPH_C64(0xF6F9508028F80D9D), + SPH_C64(0xB8E319A27AB3F215), SPH_C64(0x31AD9C1151341A4D), + SPH_C64(0x773C22A57BEF5805), SPH_C64(0x45C7561A07968633), + SPH_C64(0xF913DA9E249DBE36), SPH_C64(0xDA652D9B78A64C68), + SPH_C64(0x4C27A97F3BC334EF), SPH_C64(0x76621220E66B17F4), + SPH_C64(0x967743899ACD7D0B), SPH_C64(0xF3EE5BCAE0ED6782), + SPH_C64(0x409F753600C879FC), SPH_C64(0x06D09A39B5926DB6), + SPH_C64(0x6F83AEB0317AC588), SPH_C64(0x01E6CA4A86381F21), + SPH_C64(0x66FF3462D19F3025), SPH_C64(0x72207C24DDFD3BFB), + SPH_C64(0x4AF6B6D3E2ECE2EB), SPH_C64(0x9C994DBEC7EA08DE), + SPH_C64(0x49ACE597B09A8BC4), SPH_C64(0xB38C4766CF0797BA), + SPH_C64(0x131B9373C57C2A75), SPH_C64(0xB1822CCE61931E58), + SPH_C64(0x9D7555B909BA1C0C), SPH_C64(0x127FAFDD937D11D2), + SPH_C64(0x29DA3BADC66D92E4), SPH_C64(0xA2C1D57154C2ECBC), + SPH_C64(0x58C5134D82F6FE24), SPH_C64(0x1C3AE3515B62274F), + SPH_C64(0xE907C82E01CB8126), SPH_C64(0xF8ED091913E37FCB), + SPH_C64(0x3249D8F9C80046C9), SPH_C64(0x80CF9BEDE388FB63), + SPH_C64(0x1881539A116CF19E), SPH_C64(0x5103F3F76BD52457), + SPH_C64(0x15B7E6F5AE47F7A8), SPH_C64(0xDBD7C6DED47E9CCF), + SPH_C64(0x44E55C410228BB1A), SPH_C64(0xB647D4255EDB4E99), + SPH_C64(0x5D11882BB8AAFC30), SPH_C64(0xF5098BBB29D3212A), + SPH_C64(0x8FB5EA14E90296B3), SPH_C64(0x677B942157DD025A), + SPH_C64(0xFB58E7C0A390ACB5), SPH_C64(0x89D3674C83BD4A01), + SPH_C64(0x9E2DA4DF4BF3B93B), SPH_C64(0xFCC41E328CAB4829), + SPH_C64(0x03F38C96BA582C52), SPH_C64(0xCAD1BDBD7FD85DB2), + SPH_C64(0xBBB442C16082AE83), SPH_C64(0xB95FE86BA5DA9AB0), + SPH_C64(0xB22E04673771A93F), SPH_C64(0x845358C9493152D8), + SPH_C64(0xBE2A488697B4541E), SPH_C64(0x95A2DC2DD38E6966), + SPH_C64(0xC02C11AC923C852B), SPH_C64(0x2388B1990DF2A87B), + SPH_C64(0x7C8008FA1B4F37BE), SPH_C64(0x1F70D0C84D54E503), + SPH_C64(0x5490ADEC7ECE57D4), SPH_C64(0x002B3C27D9063A3A), + SPH_C64(0x7EAEA3848030A2BF), SPH_C64(0xC602326DED2003C0), + SPH_C64(0x83A7287D69A94086), SPH_C64(0xC57A5FCB30F57A8A), + SPH_C64(0xB56844E479EBE779), SPH_C64(0xA373B40F05DCBCE9), + SPH_C64(0xD71A786E88570EE2), SPH_C64(0x879CBACDBDE8F6A0), + SPH_C64(0x976AD1BCC164A32F), SPH_C64(0xAB21E25E9666D78B), + SPH_C64(0x901063AAE5E5C33C), SPH_C64(0x9818B34448698D90), + SPH_C64(0xE36487AE3E1E8ABB), SPH_C64(0xAFBDF931893BDCB4), + SPH_C64(0x6345A0DC5FBBD519), SPH_C64(0x8628FE269B9465CA), + SPH_C64(0x1E5D01603F9C51EC), SPH_C64(0x4DE44006A15049B7), + SPH_C64(0xBF6C70E5F776CBB1), SPH_C64(0x411218F2EF552BED), + SPH_C64(0xCB0C0708705A36A3), SPH_C64(0xE74D14754F986044), + SPH_C64(0xCD56D9430EA8280E), SPH_C64(0xC12591D7535F5065), + SPH_C64(0xC83223F1720AEF96), SPH_C64(0xC3A0396F7363A51F) +}; + +#define PASS(a, b, c, mul) { \ + ROUND(a, b, c, X0, mul); \ + ROUND(b, c, a, X1, mul); \ + ROUND(c, a, b, X2, mul); \ + ROUND(a, b, c, X3, mul); \ + ROUND(b, c, a, X4, mul); \ + ROUND(c, a, b, X5, mul); \ + ROUND(a, b, c, X6, mul); \ + ROUND(b, c, a, X7, mul); \ + } + +#define MUL5(x) SPH_T64((x) * SPH_C64(5)) +#define MUL7(x) SPH_T64((x) * SPH_C64(7)) +#define MUL9(x) SPH_T64((x) * SPH_C64(9)) +/* +#define MUL5(x) ((x) * SPH_C64(5)) +#define MUL7(x) ((x) * SPH_C64(7)) +#define MUL9(x) ((x) * SPH_C64(9)) +*/ + +#define KSCHED { \ + X0 = SPH_T64(X0 - (X7 ^ SPH_C64(0xA5A5A5A5A5A5A5A5))); \ + X1 ^= X0; \ + X2 = SPH_T64(X2 + X1); \ + X3 = SPH_T64(X3 - (X2 ^ (~X1 << 19))); \ + X4 ^= X3; \ + X5 = SPH_T64(X5 + X4); \ + X6 = SPH_T64(X6 - (X5 ^ (~X4 >> 23))); \ + X7 ^= X6; \ + X0 = SPH_T64(X0 + X7); \ + X1 = SPH_T64(X1 - (X0 ^ (~X7 << 19))); \ + X2 ^= X1; \ + X3 = SPH_T64(X3 + X2); \ + X4 = SPH_T64(X4 - (X3 ^ (~X2 >> 23))); \ + X5 ^= X4; \ + X6 = SPH_T64(X6 + X5); \ + X7 = SPH_T64(X7 - (X6 ^ SPH_C64(0x0123456789ABCDEF))); \ + } + +#define TIGER_ROUND_BODY(in, r) { \ + uint64_t A, B, C; \ + uint64_t X0, X1, X2, X3, X4, X5, X6, X7; \ + \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + \ + X0 = (in[0]); \ + X1 = (in[1]); \ + X2 = (in[2]); \ + X3 = (in[3]); \ + X4 = (in[4]); \ + X5 = (in[5]); \ + X6 = (in[6]); \ + X7 = (in[7]); \ + PASS(A, B, C, MUL5); \ + KSCHED; \ + PASS(C, A, B, MUL7); \ + KSCHED; \ + PASS(B, C, A, MUL9); \ + \ + (r)[0] ^= A; \ + (r)[1] = SPH_T64(B - (r)[1]); \ + (r)[2] = SPH_T64(C + (r)[2]); \ + } + + +__global__ void m7_tiger192_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + __shared__ uint64_t sharedMem[1024]; + if(threadIdx.x < 256) + { + sharedMem[threadIdx.x] = T1[threadIdx.x]; + sharedMem[threadIdx.x+256] = T2[threadIdx.x]; + sharedMem[threadIdx.x+512] = T3[threadIdx.x]; + sharedMem[threadIdx.x+768] = T4[threadIdx.x]; + } + __syncthreads(); + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread; +union { +uint8_t h1[64]; +uint32_t h4[16]; +uint64_t h8[8]; +} hash; +/* +#undef MUL5 +#undef MUL7 +#undef MUL9 +#define MUL5(x) mul(x,5) +#define MUL7(x) mul(x,7) +#define MUL9(x) mul(x,9) +*/ +#define PASS(a, b, c, mul) { \ + ROUND(a, b, c, X0, mul); \ + ROUND(b, c, a, X1, mul); \ + ROUND(c, a, b, X2, mul); \ + ROUND(a, b, c, X3, mul); \ + ROUND(b, c, a, X4, mul); \ + ROUND(c, a, b, X5, mul); \ + ROUND(a, b, c, X6, mul); \ + ROUND(b, c, a, X7, mul); \ + } + + + +#define ROUND(a, b, c, x, mul) { \ + c ^= x; \ + a = SPH_T64(a - (sharedMem[c & 0xFF] ^ sharedMem[((c >> 16) & 0xFF)+256] \ + ^ sharedMem[((c >> 32) & 0xFF)+512] ^ sharedMem[((c >> 48) & 0xFF)+768])); \ + b = SPH_T64(b + (sharedMem[((c >> 8) & 0xFF)+768] ^ sharedMem[((c >> 24) & 0xFF)+512] \ + ^ sharedMem[((c >> 40) & 0xFF)+256] ^ sharedMem[(c >> 56) & 0xFF])); \ + b = mul(b); \ + } + + + uint64_t in[8],buf[3]; + uint64_t in2[8],in3[8]; + + #pragma unroll 8 + for (int i=0;i<8;i++) {in2[i]= c_PaddedMessage80[i+8];} + uint32_t* Mess = (uint32_t*)in2; + Mess[13]=nounce; + + #pragma unroll 7 + for (int i=0;i<7;i++) {in3[i]=0;} + in3[7]=0x3d0; + + #pragma unroll 3 + for (int i=0;i<3;i++) {buf[i]=bufo[i];} + + TIGER_ROUND_BODY(in2, buf); + TIGER_ROUND_BODY(in3, buf); + +#pragma unroll 3 +for (int i=0;i<3;i++) {outputHash[i*threads+thread]=buf[i];} + } //// threads +} + + +void tiger192_cpu_init(int thr_id, int threads) +{ + + cudaMemcpyToSymbol(gpu_III,III,sizeof(III),0, cudaMemcpyHostToDevice); + + cudaMemcpyToSymbol(T1,cpu_T1,sizeof(cpu_T1),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T2,cpu_T2,sizeof(cpu_T2),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T3,cpu_T3,sizeof(cpu_T3),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T4,cpu_T4,sizeof(cpu_T4),0, cudaMemcpyHostToDevice); + + + +} + +__host__ void m7_tiger192_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 640; // Alignment mit mixtab Grösse. NICHT ÄNDERN +// const int threadsperblock = 256; + +dim3 grid(threads/threadsperblock); +dim3 block(threadsperblock); +//dim3 grid(1); +//dim3 block(1); + size_t shared_size =0; + m7_tiger192_gpu_hash_120<<>>(threads, startNounce, d_outputHash); + + MyStreamSynchronize(NULL, order, thr_id); +} + + +__host__ void tiger192_setBlock_120(void *pdata) +{ + unsigned char PaddedMessage[128]; + uint8_t ending =0x01; + memcpy(PaddedMessage, pdata, 122); + memset(PaddedMessage+122,ending,1); + memset(PaddedMessage+123, 0, 5); //useless + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + +#undef ROUND +#undef MUL5 +#undef MUL7 +#undef MUL9 +#define MUL5(x) ((x) * SPH_C64(5)) +#define MUL7(x) ((x) * SPH_C64(7)) +#define MUL9(x) ((x) * SPH_C64(9)) + +#define ROUND(a, b, c, x, mul) { \ + c ^= x; \ + a = SPH_T64(a - (cpu_T1[c & 0xFF] ^ cpu_T2[(c >> 16) & 0xFF] \ + ^ cpu_T3[(c >> 32) & 0xFF] ^ cpu_T4[(c >> 48) & 0xFF])); \ + b = SPH_T64(b + (cpu_T4[(c >> 8) & 0xFF] ^ cpu_T3[(c >> 24) & 0xFF] \ + ^ cpu_T2[(c >> 40) & 0xFF] ^ cpu_T1[(c >> 56) & 0xFF])); \ + b = mul(b); \ + } + + + uint64_t* alt_data = (uint64_t*) pdata; + uint64_t in[8],buf[3]; + for (int i=0;i<8;i++) {in[i]= alt_data[i];} + for (int i=0;i<3;i++) {buf[i]=III[i];} + + TIGER_ROUND_BODY(in, buf) + cudaMemcpyToSymbol( bufo, buf, 3*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + + +} \ No newline at end of file diff --git a/x13/cuda_whirlpool512.cu b/x13/cuda_whirlpool512.cu new file mode 100644 index 0000000000..1a3d87c4f1 --- /dev/null +++ b/x13/cuda_whirlpool512.cu @@ -0,0 +1,2907 @@ +/* + * Built on cbuchner1's implementation, actual hashing code + * based on sphlib 3.0 + * + */ +/* + * Whirlpool kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 djm34 + * + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author djm34 + */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); + +#define USE_SHARED 1 + +#define SPH_C64(x) ((uint64_t)(x ## ULL)) +#define SPH_C32(x) ((uint32_t)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) + +#include "cuda_helper.h" + +// aus heavy.cu + + + __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) + __constant__ uint32_t pTarget[8]; + __constant__ uint64_t stateo[8]; +uint32_t *d_wnounce[8]; +uint32_t *d_WNonce[8]; + + +static __constant__ uint64_t T0[256]; +static __constant__ uint64_t T1[256]; +static __constant__ uint64_t T2[256]; +static __constant__ uint64_t T3[256]; +static __constant__ uint64_t T4[256]; +static __constant__ uint64_t T5[256]; +static __constant__ uint64_t T6[256]; +static __constant__ uint64_t T7[256]; +static const uint64_t old1_T0[256] = { + SPH_C64(0x78D8C07818281818), SPH_C64(0xAF2605AF23652323), + SPH_C64(0xF9B87EF9C657C6C6), SPH_C64(0x6FFB136FE825E8E8), + SPH_C64(0xA1CB4CA187948787), SPH_C64(0x6211A962B8D5B8B8), + SPH_C64(0x0509080501030101), SPH_C64(0x6E0D426E4FD14F4F), + SPH_C64(0xEE9BADEE365A3636), SPH_C64(0x04FF5904A6F7A6A6), + SPH_C64(0xBD0CDEBDD26BD2D2), SPH_C64(0x060EFB06F502F5F5), + SPH_C64(0x8096EF80798B7979), SPH_C64(0xCE305FCE6FB16F6F), + SPH_C64(0xEF6DFCEF91AE9191), SPH_C64(0x07F8AA0752F65252), + SPH_C64(0xFD4727FD60A06060), SPH_C64(0x76358976BCD9BCBC), + SPH_C64(0xCD37ACCD9BB09B9B), SPH_C64(0x8C8A048C8E8F8E8E), + SPH_C64(0x15D27115A3F8A3A3), SPH_C64(0x3C6C603C0C140C0C), + SPH_C64(0x8A84FF8A7B8D7B7B), SPH_C64(0xE180B5E1355F3535), + SPH_C64(0x69F5E8691D271D1D), SPH_C64(0x47B35347E03DE0E0), + SPH_C64(0xAC21F6ACD764D7D7), SPH_C64(0xED9C5EEDC25BC2C2), + SPH_C64(0x96436D962E722E2E), SPH_C64(0x7A29627A4BDD4B4B), + SPH_C64(0x215DA321FE1FFEFE), SPH_C64(0x16D5821657F95757), + SPH_C64(0x41BDA841153F1515), SPH_C64(0xB6E89FB677997777), + SPH_C64(0xEB92A5EB37593737), SPH_C64(0x569E7B56E532E5E5), + SPH_C64(0xD9138CD99FBC9F9F), SPH_C64(0x1723D317F00DF0F0), + SPH_C64(0x7F206A7F4ADE4A4A), SPH_C64(0x95449E95DA73DADA), + SPH_C64(0x25A2FA2558E85858), SPH_C64(0xCACF06CAC946C9C9), + SPH_C64(0x8D7C558D297B2929), SPH_C64(0x225A50220A1E0A0A), + SPH_C64(0x4F50E14FB1CEB1B1), SPH_C64(0x1AC9691AA0FDA0A0), + SPH_C64(0xDA147FDA6BBD6B6B), SPH_C64(0xABD95CAB85928585), + SPH_C64(0x733C8173BDDABDBD), SPH_C64(0x348FD2345DE75D5D), + SPH_C64(0x5090805010301010), SPH_C64(0x0307F303F401F4F4), + SPH_C64(0xC0DD16C0CB40CBCB), SPH_C64(0xC6D3EDC63E423E3E), + SPH_C64(0x112D2811050F0505), SPH_C64(0xE6781FE667A96767), + SPH_C64(0x53977353E431E4E4), SPH_C64(0xBB0225BB27692727), + SPH_C64(0x5873325841C34141), SPH_C64(0x9DA72C9D8B808B8B), + SPH_C64(0x01F65101A7F4A7A7), SPH_C64(0x94B2CF947D877D7D), + SPH_C64(0xFB49DCFB95A29595), SPH_C64(0x9F568E9FD875D8D8), + SPH_C64(0x30708B30FB10FBFB), SPH_C64(0x71CD2371EE2FEEEE), + SPH_C64(0x91BBC7917C847C7C), SPH_C64(0xE37117E366AA6666), + SPH_C64(0x8E7BA68EDD7ADDDD), SPH_C64(0x4BAFB84B17391717), + SPH_C64(0x4645024647C94747), SPH_C64(0xDC1A84DC9EBF9E9E), + SPH_C64(0xC5D41EC5CA43CACA), SPH_C64(0x995875992D772D2D), + SPH_C64(0x792E9179BFDCBFBF), SPH_C64(0x1B3F381B07090707), + SPH_C64(0x23AC0123ADEAADAD), SPH_C64(0x2FB0EA2F5AEE5A5A), + SPH_C64(0xB5EF6CB583988383), SPH_C64(0xFFB685FF33553333), + SPH_C64(0xF25C3FF263A56363), SPH_C64(0x0A12100A02060202), + SPH_C64(0x38933938AAE3AAAA), SPH_C64(0xA8DEAFA871937171), + SPH_C64(0xCFC60ECFC845C8C8), SPH_C64(0x7DD1C87D192B1919), + SPH_C64(0x703B727049DB4949), SPH_C64(0x9A5F869AD976D9D9), + SPH_C64(0x1D31C31DF20BF2F2), SPH_C64(0x48A84B48E338E3E3), + SPH_C64(0x2AB9E22A5BED5B5B), SPH_C64(0x92BC349288858888), + SPH_C64(0xC83EA4C89AB39A9A), SPH_C64(0xBE0B2DBE266A2626), + SPH_C64(0xFABF8DFA32563232), SPH_C64(0x4A59E94AB0CDB0B0), + SPH_C64(0x6AF21B6AE926E9E9), SPH_C64(0x337778330F110F0F), + SPH_C64(0xA633E6A6D562D5D5), SPH_C64(0xBAF474BA809D8080), + SPH_C64(0x7C27997CBEDFBEBE), SPH_C64(0xDEEB26DECD4ACDCD), + SPH_C64(0xE489BDE4345C3434), SPH_C64(0x75327A7548D84848), + SPH_C64(0x2454AB24FF1CFFFF), SPH_C64(0x8F8DF78F7A8E7A7A), + SPH_C64(0xEA64F4EA90AD9090), SPH_C64(0x3E9DC23E5FE15F5F), + SPH_C64(0xA03D1DA020602020), SPH_C64(0xD50F67D568B86868), + SPH_C64(0x72CAD0721A2E1A1A), SPH_C64(0x2CB7192CAEEFAEAE), + SPH_C64(0x5E7DC95EB4C1B4B4), SPH_C64(0x19CE9A1954FC5454), + SPH_C64(0xE57FECE593A89393), SPH_C64(0xAA2F0DAA22662222), + SPH_C64(0xE96307E964AC6464), SPH_C64(0x122ADB12F10EF1F1), + SPH_C64(0xA2CCBFA273957373), SPH_C64(0x5A82905A12361212), + SPH_C64(0x5D7A3A5D40C04040), SPH_C64(0x2848402808180808), + SPH_C64(0xE89556E8C358C3C3), SPH_C64(0x7BDF337BEC29ECEC), + SPH_C64(0x904D9690DB70DBDB), SPH_C64(0x1FC0611FA1FEA1A1), + SPH_C64(0x83911C838D8A8D8D), SPH_C64(0xC9C8F5C93D473D3D), + SPH_C64(0xF15BCCF197A49797), SPH_C64(0x0000000000000000), + SPH_C64(0xD4F936D4CF4CCFCF), SPH_C64(0x876E45872B7D2B2B), + SPH_C64(0xB3E197B3769A7676), SPH_C64(0xB0E664B0829B8282), + SPH_C64(0xA928FEA9D667D6D6), SPH_C64(0x77C3D8771B2D1B1B), + SPH_C64(0x5B74C15BB5C2B5B5), SPH_C64(0x29BE1129AFECAFAF), + SPH_C64(0xDF1D77DF6ABE6A6A), SPH_C64(0x0DEABA0D50F05050), + SPH_C64(0x4C57124C45CF4545), SPH_C64(0x1838CB18F308F3F3), + SPH_C64(0xF0AD9DF030503030), SPH_C64(0x74C42B74EF2CEFEF), + SPH_C64(0xC3DAE5C33F413F3F), SPH_C64(0x1CC7921C55FF5555), + SPH_C64(0x10DB7910A2FBA2A2), SPH_C64(0x65E90365EA23EAEA), + SPH_C64(0xEC6A0FEC65AF6565), SPH_C64(0x6803B968BAD3BABA), + SPH_C64(0x934A65932F712F2F), SPH_C64(0xE78E4EE7C05DC0C0), + SPH_C64(0x8160BE81DE7FDEDE), SPH_C64(0x6CFCE06C1C241C1C), + SPH_C64(0x2E46BB2EFD1AFDFD), SPH_C64(0x641F52644DD74D4D), + SPH_C64(0xE076E4E092AB9292), SPH_C64(0xBCFA8FBC759F7575), + SPH_C64(0x1E36301E060A0606), SPH_C64(0x98AE24988A838A8A), + SPH_C64(0x404BF940B2CBB2B2), SPH_C64(0x59856359E637E6E6), + SPH_C64(0x367E70360E120E0E), SPH_C64(0x63E7F8631F211F1F), + SPH_C64(0xF75537F762A66262), SPH_C64(0xA33AEEA3D461D4D4), + SPH_C64(0x32812932A8E5A8A8), SPH_C64(0xF452C4F496A79696), + SPH_C64(0x3A629B3AF916F9F9), SPH_C64(0xF6A366F6C552C5C5), + SPH_C64(0xB11035B1256F2525), SPH_C64(0x20ABF22059EB5959), + SPH_C64(0xAED054AE84918484), SPH_C64(0xA7C5B7A772967272), + SPH_C64(0xDDECD5DD394B3939), SPH_C64(0x61165A614CD44C4C), + SPH_C64(0x3B94CA3B5EE25E5E), SPH_C64(0x859FE78578887878), + SPH_C64(0xD8E5DDD838483838), SPH_C64(0x869814868C898C8C), + SPH_C64(0xB217C6B2D16ED1D1), SPH_C64(0x0BE4410BA5F2A5A5), + SPH_C64(0x4DA1434DE23BE2E2), SPH_C64(0xF84E2FF861A36161), + SPH_C64(0x4542F145B3C8B3B3), SPH_C64(0xA53415A521632121), + SPH_C64(0xD60894D69CB99C9C), SPH_C64(0x66EEF0661E221E1E), + SPH_C64(0x5261225243C54343), SPH_C64(0xFCB176FCC754C7C7), + SPH_C64(0x2B4FB32BFC19FCFC), SPH_C64(0x14242014040C0404), + SPH_C64(0x08E3B20851F35151), SPH_C64(0xC725BCC799B69999), + SPH_C64(0xC4224FC46DB76D6D), SPH_C64(0x396568390D170D0D), + SPH_C64(0x35798335FA13FAFA), SPH_C64(0x8469B684DF7CDFDF), + SPH_C64(0x9BA9D79B7E827E7E), SPH_C64(0xB4193DB4246C2424), + SPH_C64(0xD7FEC5D73B4D3B3B), SPH_C64(0x3D9A313DABE0ABAB), + SPH_C64(0xD1F03ED1CE4FCECE), SPH_C64(0x5599885511331111), + SPH_C64(0x89830C898F8C8F8F), SPH_C64(0x6B044A6B4ED24E4E), + SPH_C64(0x5166D151B7C4B7B7), SPH_C64(0x60E00B60EB20EBEB), + SPH_C64(0xCCC1FDCC3C443C3C), SPH_C64(0xBFFD7CBF819E8181), + SPH_C64(0xFE40D4FE94A19494), SPH_C64(0x0C1CEB0CF704F7F7), + SPH_C64(0x6718A167B9D6B9B9), SPH_C64(0x5F8B985F13351313), + SPH_C64(0x9C517D9C2C742C2C), SPH_C64(0xB805D6B8D368D3D3), + SPH_C64(0x5C8C6B5CE734E7E7), SPH_C64(0xCB3957CB6EB26E6E), + SPH_C64(0xF3AA6EF3C451C4C4), SPH_C64(0x0F1B180F03050303), + SPH_C64(0x13DC8A1356FA5656), SPH_C64(0x495E1A4944CC4444), + SPH_C64(0x9EA0DF9E7F817F7F), SPH_C64(0x37882137A9E6A9A9), + SPH_C64(0x82674D822A7E2A2A), SPH_C64(0x6D0AB16DBBD0BBBB), + SPH_C64(0xE28746E2C15EC1C1), SPH_C64(0x02F1A20253F55353), + SPH_C64(0x8B72AE8BDC79DCDC), SPH_C64(0x275358270B1D0B0B), + SPH_C64(0xD3019CD39DBA9D9D), SPH_C64(0xC12B47C16CB46C6C), + SPH_C64(0xF5A495F531533131), SPH_C64(0xB9F387B9749C7474), + SPH_C64(0x0915E309F607F6F6), SPH_C64(0x434C0A4346CA4646), + SPH_C64(0x26A50926ACE9ACAC), SPH_C64(0x97B53C9789868989), + SPH_C64(0x44B4A044143C1414), SPH_C64(0x42BA5B42E13EE1E1), + SPH_C64(0x4EA6B04E163A1616), SPH_C64(0xD2F7CDD23A4E3A3A), + SPH_C64(0xD0066FD069BB6969), SPH_C64(0x2D41482D091B0909), + SPH_C64(0xADD7A7AD70907070), SPH_C64(0x546FD954B6C7B6B6), + SPH_C64(0xB71ECEB7D06DD0D0), SPH_C64(0x7ED63B7EED2AEDED), + SPH_C64(0xDBE22EDBCC49CCCC), SPH_C64(0x57682A5742C64242), + SPH_C64(0xC22CB4C298B59898), SPH_C64(0x0EED490EA4F1A4A4), + SPH_C64(0x88755D8828782828), SPH_C64(0x3186DA315CE45C5C), + SPH_C64(0x3F6B933FF815F8F8), SPH_C64(0xA4C244A486978686) +}; + + + +static const uint64_t old1_T1[256] = { + SPH_C64(0xD8C0781828181878), SPH_C64(0x2605AF23652323AF), + SPH_C64(0xB87EF9C657C6C6F9), SPH_C64(0xFB136FE825E8E86F), + SPH_C64(0xCB4CA187948787A1), SPH_C64(0x11A962B8D5B8B862), + SPH_C64(0x0908050103010105), SPH_C64(0x0D426E4FD14F4F6E), + SPH_C64(0x9BADEE365A3636EE), SPH_C64(0xFF5904A6F7A6A604), + SPH_C64(0x0CDEBDD26BD2D2BD), SPH_C64(0x0EFB06F502F5F506), + SPH_C64(0x96EF80798B797980), SPH_C64(0x305FCE6FB16F6FCE), + SPH_C64(0x6DFCEF91AE9191EF), SPH_C64(0xF8AA0752F6525207), + SPH_C64(0x4727FD60A06060FD), SPH_C64(0x358976BCD9BCBC76), + SPH_C64(0x37ACCD9BB09B9BCD), SPH_C64(0x8A048C8E8F8E8E8C), + SPH_C64(0xD27115A3F8A3A315), SPH_C64(0x6C603C0C140C0C3C), + SPH_C64(0x84FF8A7B8D7B7B8A), SPH_C64(0x80B5E1355F3535E1), + SPH_C64(0xF5E8691D271D1D69), SPH_C64(0xB35347E03DE0E047), + SPH_C64(0x21F6ACD764D7D7AC), SPH_C64(0x9C5EEDC25BC2C2ED), + SPH_C64(0x436D962E722E2E96), SPH_C64(0x29627A4BDD4B4B7A), + SPH_C64(0x5DA321FE1FFEFE21), SPH_C64(0xD5821657F9575716), + SPH_C64(0xBDA841153F151541), SPH_C64(0xE89FB677997777B6), + SPH_C64(0x92A5EB37593737EB), SPH_C64(0x9E7B56E532E5E556), + SPH_C64(0x138CD99FBC9F9FD9), SPH_C64(0x23D317F00DF0F017), + SPH_C64(0x206A7F4ADE4A4A7F), SPH_C64(0x449E95DA73DADA95), + SPH_C64(0xA2FA2558E8585825), SPH_C64(0xCF06CAC946C9C9CA), + SPH_C64(0x7C558D297B29298D), SPH_C64(0x5A50220A1E0A0A22), + SPH_C64(0x50E14FB1CEB1B14F), SPH_C64(0xC9691AA0FDA0A01A), + SPH_C64(0x147FDA6BBD6B6BDA), SPH_C64(0xD95CAB85928585AB), + SPH_C64(0x3C8173BDDABDBD73), SPH_C64(0x8FD2345DE75D5D34), + SPH_C64(0x9080501030101050), SPH_C64(0x07F303F401F4F403), + SPH_C64(0xDD16C0CB40CBCBC0), SPH_C64(0xD3EDC63E423E3EC6), + SPH_C64(0x2D2811050F050511), SPH_C64(0x781FE667A96767E6), + SPH_C64(0x977353E431E4E453), SPH_C64(0x0225BB27692727BB), + SPH_C64(0x73325841C3414158), SPH_C64(0xA72C9D8B808B8B9D), + SPH_C64(0xF65101A7F4A7A701), SPH_C64(0xB2CF947D877D7D94), + SPH_C64(0x49DCFB95A29595FB), SPH_C64(0x568E9FD875D8D89F), + SPH_C64(0x708B30FB10FBFB30), SPH_C64(0xCD2371EE2FEEEE71), + SPH_C64(0xBBC7917C847C7C91), SPH_C64(0x7117E366AA6666E3), + SPH_C64(0x7BA68EDD7ADDDD8E), SPH_C64(0xAFB84B173917174B), + SPH_C64(0x45024647C9474746), SPH_C64(0x1A84DC9EBF9E9EDC), + SPH_C64(0xD41EC5CA43CACAC5), SPH_C64(0x5875992D772D2D99), + SPH_C64(0x2E9179BFDCBFBF79), SPH_C64(0x3F381B070907071B), + SPH_C64(0xAC0123ADEAADAD23), SPH_C64(0xB0EA2F5AEE5A5A2F), + SPH_C64(0xEF6CB583988383B5), SPH_C64(0xB685FF33553333FF), + SPH_C64(0x5C3FF263A56363F2), SPH_C64(0x12100A020602020A), + SPH_C64(0x933938AAE3AAAA38), SPH_C64(0xDEAFA871937171A8), + SPH_C64(0xC60ECFC845C8C8CF), SPH_C64(0xD1C87D192B19197D), + SPH_C64(0x3B727049DB494970), SPH_C64(0x5F869AD976D9D99A), + SPH_C64(0x31C31DF20BF2F21D), SPH_C64(0xA84B48E338E3E348), + SPH_C64(0xB9E22A5BED5B5B2A), SPH_C64(0xBC34928885888892), + SPH_C64(0x3EA4C89AB39A9AC8), SPH_C64(0x0B2DBE266A2626BE), + SPH_C64(0xBF8DFA32563232FA), SPH_C64(0x59E94AB0CDB0B04A), + SPH_C64(0xF21B6AE926E9E96A), SPH_C64(0x7778330F110F0F33), + SPH_C64(0x33E6A6D562D5D5A6), SPH_C64(0xF474BA809D8080BA), + SPH_C64(0x27997CBEDFBEBE7C), SPH_C64(0xEB26DECD4ACDCDDE), + SPH_C64(0x89BDE4345C3434E4), SPH_C64(0x327A7548D8484875), + SPH_C64(0x54AB24FF1CFFFF24), SPH_C64(0x8DF78F7A8E7A7A8F), + SPH_C64(0x64F4EA90AD9090EA), SPH_C64(0x9DC23E5FE15F5F3E), + SPH_C64(0x3D1DA020602020A0), SPH_C64(0x0F67D568B86868D5), + SPH_C64(0xCAD0721A2E1A1A72), SPH_C64(0xB7192CAEEFAEAE2C), + SPH_C64(0x7DC95EB4C1B4B45E), SPH_C64(0xCE9A1954FC545419), + SPH_C64(0x7FECE593A89393E5), SPH_C64(0x2F0DAA22662222AA), + SPH_C64(0x6307E964AC6464E9), SPH_C64(0x2ADB12F10EF1F112), + SPH_C64(0xCCBFA273957373A2), SPH_C64(0x82905A123612125A), + SPH_C64(0x7A3A5D40C040405D), SPH_C64(0x4840280818080828), + SPH_C64(0x9556E8C358C3C3E8), SPH_C64(0xDF337BEC29ECEC7B), + SPH_C64(0x4D9690DB70DBDB90), SPH_C64(0xC0611FA1FEA1A11F), + SPH_C64(0x911C838D8A8D8D83), SPH_C64(0xC8F5C93D473D3DC9), + SPH_C64(0x5BCCF197A49797F1), SPH_C64(0x0000000000000000), + SPH_C64(0xF936D4CF4CCFCFD4), SPH_C64(0x6E45872B7D2B2B87), + SPH_C64(0xE197B3769A7676B3), SPH_C64(0xE664B0829B8282B0), + SPH_C64(0x28FEA9D667D6D6A9), SPH_C64(0xC3D8771B2D1B1B77), + SPH_C64(0x74C15BB5C2B5B55B), SPH_C64(0xBE1129AFECAFAF29), + SPH_C64(0x1D77DF6ABE6A6ADF), SPH_C64(0xEABA0D50F050500D), + SPH_C64(0x57124C45CF45454C), SPH_C64(0x38CB18F308F3F318), + SPH_C64(0xAD9DF030503030F0), SPH_C64(0xC42B74EF2CEFEF74), + SPH_C64(0xDAE5C33F413F3FC3), SPH_C64(0xC7921C55FF55551C), + SPH_C64(0xDB7910A2FBA2A210), SPH_C64(0xE90365EA23EAEA65), + SPH_C64(0x6A0FEC65AF6565EC), SPH_C64(0x03B968BAD3BABA68), + SPH_C64(0x4A65932F712F2F93), SPH_C64(0x8E4EE7C05DC0C0E7), + SPH_C64(0x60BE81DE7FDEDE81), SPH_C64(0xFCE06C1C241C1C6C), + SPH_C64(0x46BB2EFD1AFDFD2E), SPH_C64(0x1F52644DD74D4D64), + SPH_C64(0x76E4E092AB9292E0), SPH_C64(0xFA8FBC759F7575BC), + SPH_C64(0x36301E060A06061E), SPH_C64(0xAE24988A838A8A98), + SPH_C64(0x4BF940B2CBB2B240), SPH_C64(0x856359E637E6E659), + SPH_C64(0x7E70360E120E0E36), SPH_C64(0xE7F8631F211F1F63), + SPH_C64(0x5537F762A66262F7), SPH_C64(0x3AEEA3D461D4D4A3), + SPH_C64(0x812932A8E5A8A832), SPH_C64(0x52C4F496A79696F4), + SPH_C64(0x629B3AF916F9F93A), SPH_C64(0xA366F6C552C5C5F6), + SPH_C64(0x1035B1256F2525B1), SPH_C64(0xABF22059EB595920), + SPH_C64(0xD054AE84918484AE), SPH_C64(0xC5B7A772967272A7), + SPH_C64(0xECD5DD394B3939DD), SPH_C64(0x165A614CD44C4C61), + SPH_C64(0x94CA3B5EE25E5E3B), SPH_C64(0x9FE7857888787885), + SPH_C64(0xE5DDD838483838D8), SPH_C64(0x9814868C898C8C86), + SPH_C64(0x17C6B2D16ED1D1B2), SPH_C64(0xE4410BA5F2A5A50B), + SPH_C64(0xA1434DE23BE2E24D), SPH_C64(0x4E2FF861A36161F8), + SPH_C64(0x42F145B3C8B3B345), SPH_C64(0x3415A521632121A5), + SPH_C64(0x0894D69CB99C9CD6), SPH_C64(0xEEF0661E221E1E66), + SPH_C64(0x61225243C5434352), SPH_C64(0xB176FCC754C7C7FC), + SPH_C64(0x4FB32BFC19FCFC2B), SPH_C64(0x242014040C040414), + SPH_C64(0xE3B20851F3515108), SPH_C64(0x25BCC799B69999C7), + SPH_C64(0x224FC46DB76D6DC4), SPH_C64(0x6568390D170D0D39), + SPH_C64(0x798335FA13FAFA35), SPH_C64(0x69B684DF7CDFDF84), + SPH_C64(0xA9D79B7E827E7E9B), SPH_C64(0x193DB4246C2424B4), + SPH_C64(0xFEC5D73B4D3B3BD7), SPH_C64(0x9A313DABE0ABAB3D), + SPH_C64(0xF03ED1CE4FCECED1), SPH_C64(0x9988551133111155), + SPH_C64(0x830C898F8C8F8F89), SPH_C64(0x044A6B4ED24E4E6B), + SPH_C64(0x66D151B7C4B7B751), SPH_C64(0xE00B60EB20EBEB60), + SPH_C64(0xC1FDCC3C443C3CCC), SPH_C64(0xFD7CBF819E8181BF), + SPH_C64(0x40D4FE94A19494FE), SPH_C64(0x1CEB0CF704F7F70C), + SPH_C64(0x18A167B9D6B9B967), SPH_C64(0x8B985F133513135F), + SPH_C64(0x517D9C2C742C2C9C), SPH_C64(0x05D6B8D368D3D3B8), + SPH_C64(0x8C6B5CE734E7E75C), SPH_C64(0x3957CB6EB26E6ECB), + SPH_C64(0xAA6EF3C451C4C4F3), SPH_C64(0x1B180F030503030F), + SPH_C64(0xDC8A1356FA565613), SPH_C64(0x5E1A4944CC444449), + SPH_C64(0xA0DF9E7F817F7F9E), SPH_C64(0x882137A9E6A9A937), + SPH_C64(0x674D822A7E2A2A82), SPH_C64(0x0AB16DBBD0BBBB6D), + SPH_C64(0x8746E2C15EC1C1E2), SPH_C64(0xF1A20253F5535302), + SPH_C64(0x72AE8BDC79DCDC8B), SPH_C64(0x5358270B1D0B0B27), + SPH_C64(0x019CD39DBA9D9DD3), SPH_C64(0x2B47C16CB46C6CC1), + SPH_C64(0xA495F531533131F5), SPH_C64(0xF387B9749C7474B9), + SPH_C64(0x15E309F607F6F609), SPH_C64(0x4C0A4346CA464643), + SPH_C64(0xA50926ACE9ACAC26), SPH_C64(0xB53C978986898997), + SPH_C64(0xB4A044143C141444), SPH_C64(0xBA5B42E13EE1E142), + SPH_C64(0xA6B04E163A16164E), SPH_C64(0xF7CDD23A4E3A3AD2), + SPH_C64(0x066FD069BB6969D0), SPH_C64(0x41482D091B09092D), + SPH_C64(0xD7A7AD70907070AD), SPH_C64(0x6FD954B6C7B6B654), + SPH_C64(0x1ECEB7D06DD0D0B7), SPH_C64(0xD63B7EED2AEDED7E), + SPH_C64(0xE22EDBCC49CCCCDB), SPH_C64(0x682A5742C6424257), + SPH_C64(0x2CB4C298B59898C2), SPH_C64(0xED490EA4F1A4A40E), + SPH_C64(0x755D882878282888), SPH_C64(0x86DA315CE45C5C31), + SPH_C64(0x6B933FF815F8F83F), SPH_C64(0xC244A486978686A4) +}; + +static const uint64_t old1_T2[256] = { + SPH_C64(0xC0781828181878D8), SPH_C64(0x05AF23652323AF26), + SPH_C64(0x7EF9C657C6C6F9B8), SPH_C64(0x136FE825E8E86FFB), + SPH_C64(0x4CA187948787A1CB), SPH_C64(0xA962B8D5B8B86211), + SPH_C64(0x0805010301010509), SPH_C64(0x426E4FD14F4F6E0D), + SPH_C64(0xADEE365A3636EE9B), SPH_C64(0x5904A6F7A6A604FF), + SPH_C64(0xDEBDD26BD2D2BD0C), SPH_C64(0xFB06F502F5F5060E), + SPH_C64(0xEF80798B79798096), SPH_C64(0x5FCE6FB16F6FCE30), + SPH_C64(0xFCEF91AE9191EF6D), SPH_C64(0xAA0752F6525207F8), + SPH_C64(0x27FD60A06060FD47), SPH_C64(0x8976BCD9BCBC7635), + SPH_C64(0xACCD9BB09B9BCD37), SPH_C64(0x048C8E8F8E8E8C8A), + SPH_C64(0x7115A3F8A3A315D2), SPH_C64(0x603C0C140C0C3C6C), + SPH_C64(0xFF8A7B8D7B7B8A84), SPH_C64(0xB5E1355F3535E180), + SPH_C64(0xE8691D271D1D69F5), SPH_C64(0x5347E03DE0E047B3), + SPH_C64(0xF6ACD764D7D7AC21), SPH_C64(0x5EEDC25BC2C2ED9C), + SPH_C64(0x6D962E722E2E9643), SPH_C64(0x627A4BDD4B4B7A29), + SPH_C64(0xA321FE1FFEFE215D), SPH_C64(0x821657F9575716D5), + SPH_C64(0xA841153F151541BD), SPH_C64(0x9FB677997777B6E8), + SPH_C64(0xA5EB37593737EB92), SPH_C64(0x7B56E532E5E5569E), + SPH_C64(0x8CD99FBC9F9FD913), SPH_C64(0xD317F00DF0F01723), + SPH_C64(0x6A7F4ADE4A4A7F20), SPH_C64(0x9E95DA73DADA9544), + SPH_C64(0xFA2558E8585825A2), SPH_C64(0x06CAC946C9C9CACF), + SPH_C64(0x558D297B29298D7C), SPH_C64(0x50220A1E0A0A225A), + SPH_C64(0xE14FB1CEB1B14F50), SPH_C64(0x691AA0FDA0A01AC9), + SPH_C64(0x7FDA6BBD6B6BDA14), SPH_C64(0x5CAB85928585ABD9), + SPH_C64(0x8173BDDABDBD733C), SPH_C64(0xD2345DE75D5D348F), + SPH_C64(0x8050103010105090), SPH_C64(0xF303F401F4F40307), + SPH_C64(0x16C0CB40CBCBC0DD), SPH_C64(0xEDC63E423E3EC6D3), + SPH_C64(0x2811050F0505112D), SPH_C64(0x1FE667A96767E678), + SPH_C64(0x7353E431E4E45397), SPH_C64(0x25BB27692727BB02), + SPH_C64(0x325841C341415873), SPH_C64(0x2C9D8B808B8B9DA7), + SPH_C64(0x5101A7F4A7A701F6), SPH_C64(0xCF947D877D7D94B2), + SPH_C64(0xDCFB95A29595FB49), SPH_C64(0x8E9FD875D8D89F56), + SPH_C64(0x8B30FB10FBFB3070), SPH_C64(0x2371EE2FEEEE71CD), + SPH_C64(0xC7917C847C7C91BB), SPH_C64(0x17E366AA6666E371), + SPH_C64(0xA68EDD7ADDDD8E7B), SPH_C64(0xB84B173917174BAF), + SPH_C64(0x024647C947474645), SPH_C64(0x84DC9EBF9E9EDC1A), + SPH_C64(0x1EC5CA43CACAC5D4), SPH_C64(0x75992D772D2D9958), + SPH_C64(0x9179BFDCBFBF792E), SPH_C64(0x381B070907071B3F), + SPH_C64(0x0123ADEAADAD23AC), SPH_C64(0xEA2F5AEE5A5A2FB0), + SPH_C64(0x6CB583988383B5EF), SPH_C64(0x85FF33553333FFB6), + SPH_C64(0x3FF263A56363F25C), SPH_C64(0x100A020602020A12), + SPH_C64(0x3938AAE3AAAA3893), SPH_C64(0xAFA871937171A8DE), + SPH_C64(0x0ECFC845C8C8CFC6), SPH_C64(0xC87D192B19197DD1), + SPH_C64(0x727049DB4949703B), SPH_C64(0x869AD976D9D99A5F), + SPH_C64(0xC31DF20BF2F21D31), SPH_C64(0x4B48E338E3E348A8), + SPH_C64(0xE22A5BED5B5B2AB9), SPH_C64(0x34928885888892BC), + SPH_C64(0xA4C89AB39A9AC83E), SPH_C64(0x2DBE266A2626BE0B), + SPH_C64(0x8DFA32563232FABF), SPH_C64(0xE94AB0CDB0B04A59), + SPH_C64(0x1B6AE926E9E96AF2), SPH_C64(0x78330F110F0F3377), + SPH_C64(0xE6A6D562D5D5A633), SPH_C64(0x74BA809D8080BAF4), + SPH_C64(0x997CBEDFBEBE7C27), SPH_C64(0x26DECD4ACDCDDEEB), + SPH_C64(0xBDE4345C3434E489), SPH_C64(0x7A7548D848487532), + SPH_C64(0xAB24FF1CFFFF2454), SPH_C64(0xF78F7A8E7A7A8F8D), + SPH_C64(0xF4EA90AD9090EA64), SPH_C64(0xC23E5FE15F5F3E9D), + SPH_C64(0x1DA020602020A03D), SPH_C64(0x67D568B86868D50F), + SPH_C64(0xD0721A2E1A1A72CA), SPH_C64(0x192CAEEFAEAE2CB7), + SPH_C64(0xC95EB4C1B4B45E7D), SPH_C64(0x9A1954FC545419CE), + SPH_C64(0xECE593A89393E57F), SPH_C64(0x0DAA22662222AA2F), + SPH_C64(0x07E964AC6464E963), SPH_C64(0xDB12F10EF1F1122A), + SPH_C64(0xBFA273957373A2CC), SPH_C64(0x905A123612125A82), + SPH_C64(0x3A5D40C040405D7A), SPH_C64(0x4028081808082848), + SPH_C64(0x56E8C358C3C3E895), SPH_C64(0x337BEC29ECEC7BDF), + SPH_C64(0x9690DB70DBDB904D), SPH_C64(0x611FA1FEA1A11FC0), + SPH_C64(0x1C838D8A8D8D8391), SPH_C64(0xF5C93D473D3DC9C8), + SPH_C64(0xCCF197A49797F15B), SPH_C64(0x0000000000000000), + SPH_C64(0x36D4CF4CCFCFD4F9), SPH_C64(0x45872B7D2B2B876E), + SPH_C64(0x97B3769A7676B3E1), SPH_C64(0x64B0829B8282B0E6), + SPH_C64(0xFEA9D667D6D6A928), SPH_C64(0xD8771B2D1B1B77C3), + SPH_C64(0xC15BB5C2B5B55B74), SPH_C64(0x1129AFECAFAF29BE), + SPH_C64(0x77DF6ABE6A6ADF1D), SPH_C64(0xBA0D50F050500DEA), + SPH_C64(0x124C45CF45454C57), SPH_C64(0xCB18F308F3F31838), + SPH_C64(0x9DF030503030F0AD), SPH_C64(0x2B74EF2CEFEF74C4), + SPH_C64(0xE5C33F413F3FC3DA), SPH_C64(0x921C55FF55551CC7), + SPH_C64(0x7910A2FBA2A210DB), SPH_C64(0x0365EA23EAEA65E9), + SPH_C64(0x0FEC65AF6565EC6A), SPH_C64(0xB968BAD3BABA6803), + SPH_C64(0x65932F712F2F934A), SPH_C64(0x4EE7C05DC0C0E78E), + SPH_C64(0xBE81DE7FDEDE8160), SPH_C64(0xE06C1C241C1C6CFC), + SPH_C64(0xBB2EFD1AFDFD2E46), SPH_C64(0x52644DD74D4D641F), + SPH_C64(0xE4E092AB9292E076), SPH_C64(0x8FBC759F7575BCFA), + SPH_C64(0x301E060A06061E36), SPH_C64(0x24988A838A8A98AE), + SPH_C64(0xF940B2CBB2B2404B), SPH_C64(0x6359E637E6E65985), + SPH_C64(0x70360E120E0E367E), SPH_C64(0xF8631F211F1F63E7), + SPH_C64(0x37F762A66262F755), SPH_C64(0xEEA3D461D4D4A33A), + SPH_C64(0x2932A8E5A8A83281), SPH_C64(0xC4F496A79696F452), + SPH_C64(0x9B3AF916F9F93A62), SPH_C64(0x66F6C552C5C5F6A3), + SPH_C64(0x35B1256F2525B110), SPH_C64(0xF22059EB595920AB), + SPH_C64(0x54AE84918484AED0), SPH_C64(0xB7A772967272A7C5), + SPH_C64(0xD5DD394B3939DDEC), SPH_C64(0x5A614CD44C4C6116), + SPH_C64(0xCA3B5EE25E5E3B94), SPH_C64(0xE78578887878859F), + SPH_C64(0xDDD838483838D8E5), SPH_C64(0x14868C898C8C8698), + SPH_C64(0xC6B2D16ED1D1B217), SPH_C64(0x410BA5F2A5A50BE4), + SPH_C64(0x434DE23BE2E24DA1), SPH_C64(0x2FF861A36161F84E), + SPH_C64(0xF145B3C8B3B34542), SPH_C64(0x15A521632121A534), + SPH_C64(0x94D69CB99C9CD608), SPH_C64(0xF0661E221E1E66EE), + SPH_C64(0x225243C543435261), SPH_C64(0x76FCC754C7C7FCB1), + SPH_C64(0xB32BFC19FCFC2B4F), SPH_C64(0x2014040C04041424), + SPH_C64(0xB20851F3515108E3), SPH_C64(0xBCC799B69999C725), + SPH_C64(0x4FC46DB76D6DC422), SPH_C64(0x68390D170D0D3965), + SPH_C64(0x8335FA13FAFA3579), SPH_C64(0xB684DF7CDFDF8469), + SPH_C64(0xD79B7E827E7E9BA9), SPH_C64(0x3DB4246C2424B419), + SPH_C64(0xC5D73B4D3B3BD7FE), SPH_C64(0x313DABE0ABAB3D9A), + SPH_C64(0x3ED1CE4FCECED1F0), SPH_C64(0x8855113311115599), + SPH_C64(0x0C898F8C8F8F8983), SPH_C64(0x4A6B4ED24E4E6B04), + SPH_C64(0xD151B7C4B7B75166), SPH_C64(0x0B60EB20EBEB60E0), + SPH_C64(0xFDCC3C443C3CCCC1), SPH_C64(0x7CBF819E8181BFFD), + SPH_C64(0xD4FE94A19494FE40), SPH_C64(0xEB0CF704F7F70C1C), + SPH_C64(0xA167B9D6B9B96718), SPH_C64(0x985F133513135F8B), + SPH_C64(0x7D9C2C742C2C9C51), SPH_C64(0xD6B8D368D3D3B805), + SPH_C64(0x6B5CE734E7E75C8C), SPH_C64(0x57CB6EB26E6ECB39), + SPH_C64(0x6EF3C451C4C4F3AA), SPH_C64(0x180F030503030F1B), + SPH_C64(0x8A1356FA565613DC), SPH_C64(0x1A4944CC4444495E), + SPH_C64(0xDF9E7F817F7F9EA0), SPH_C64(0x2137A9E6A9A93788), + SPH_C64(0x4D822A7E2A2A8267), SPH_C64(0xB16DBBD0BBBB6D0A), + SPH_C64(0x46E2C15EC1C1E287), SPH_C64(0xA20253F5535302F1), + SPH_C64(0xAE8BDC79DCDC8B72), SPH_C64(0x58270B1D0B0B2753), + SPH_C64(0x9CD39DBA9D9DD301), SPH_C64(0x47C16CB46C6CC12B), + SPH_C64(0x95F531533131F5A4), SPH_C64(0x87B9749C7474B9F3), + SPH_C64(0xE309F607F6F60915), SPH_C64(0x0A4346CA4646434C), + SPH_C64(0x0926ACE9ACAC26A5), SPH_C64(0x3C978986898997B5), + SPH_C64(0xA044143C141444B4), SPH_C64(0x5B42E13EE1E142BA), + SPH_C64(0xB04E163A16164EA6), SPH_C64(0xCDD23A4E3A3AD2F7), + SPH_C64(0x6FD069BB6969D006), SPH_C64(0x482D091B09092D41), + SPH_C64(0xA7AD70907070ADD7), SPH_C64(0xD954B6C7B6B6546F), + SPH_C64(0xCEB7D06DD0D0B71E), SPH_C64(0x3B7EED2AEDED7ED6), + SPH_C64(0x2EDBCC49CCCCDBE2), SPH_C64(0x2A5742C642425768), + SPH_C64(0xB4C298B59898C22C), SPH_C64(0x490EA4F1A4A40EED), + SPH_C64(0x5D88287828288875), SPH_C64(0xDA315CE45C5C3186), + SPH_C64(0x933FF815F8F83F6B), SPH_C64(0x44A486978686A4C2) +}; + +static const uint64_t old1_T3[256] = { + SPH_C64(0x781828181878D8C0), SPH_C64(0xAF23652323AF2605), + SPH_C64(0xF9C657C6C6F9B87E), SPH_C64(0x6FE825E8E86FFB13), + SPH_C64(0xA187948787A1CB4C), SPH_C64(0x62B8D5B8B86211A9), + SPH_C64(0x0501030101050908), SPH_C64(0x6E4FD14F4F6E0D42), + SPH_C64(0xEE365A3636EE9BAD), SPH_C64(0x04A6F7A6A604FF59), + SPH_C64(0xBDD26BD2D2BD0CDE), SPH_C64(0x06F502F5F5060EFB), + SPH_C64(0x80798B79798096EF), SPH_C64(0xCE6FB16F6FCE305F), + SPH_C64(0xEF91AE9191EF6DFC), SPH_C64(0x0752F6525207F8AA), + SPH_C64(0xFD60A06060FD4727), SPH_C64(0x76BCD9BCBC763589), + SPH_C64(0xCD9BB09B9BCD37AC), SPH_C64(0x8C8E8F8E8E8C8A04), + SPH_C64(0x15A3F8A3A315D271), SPH_C64(0x3C0C140C0C3C6C60), + SPH_C64(0x8A7B8D7B7B8A84FF), SPH_C64(0xE1355F3535E180B5), + SPH_C64(0x691D271D1D69F5E8), SPH_C64(0x47E03DE0E047B353), + SPH_C64(0xACD764D7D7AC21F6), SPH_C64(0xEDC25BC2C2ED9C5E), + SPH_C64(0x962E722E2E96436D), SPH_C64(0x7A4BDD4B4B7A2962), + SPH_C64(0x21FE1FFEFE215DA3), SPH_C64(0x1657F9575716D582), + SPH_C64(0x41153F151541BDA8), SPH_C64(0xB677997777B6E89F), + SPH_C64(0xEB37593737EB92A5), SPH_C64(0x56E532E5E5569E7B), + SPH_C64(0xD99FBC9F9FD9138C), SPH_C64(0x17F00DF0F01723D3), + SPH_C64(0x7F4ADE4A4A7F206A), SPH_C64(0x95DA73DADA95449E), + SPH_C64(0x2558E8585825A2FA), SPH_C64(0xCAC946C9C9CACF06), + SPH_C64(0x8D297B29298D7C55), SPH_C64(0x220A1E0A0A225A50), + SPH_C64(0x4FB1CEB1B14F50E1), SPH_C64(0x1AA0FDA0A01AC969), + SPH_C64(0xDA6BBD6B6BDA147F), SPH_C64(0xAB85928585ABD95C), + SPH_C64(0x73BDDABDBD733C81), SPH_C64(0x345DE75D5D348FD2), + SPH_C64(0x5010301010509080), SPH_C64(0x03F401F4F40307F3), + SPH_C64(0xC0CB40CBCBC0DD16), SPH_C64(0xC63E423E3EC6D3ED), + SPH_C64(0x11050F0505112D28), SPH_C64(0xE667A96767E6781F), + SPH_C64(0x53E431E4E4539773), SPH_C64(0xBB27692727BB0225), + SPH_C64(0x5841C34141587332), SPH_C64(0x9D8B808B8B9DA72C), + SPH_C64(0x01A7F4A7A701F651), SPH_C64(0x947D877D7D94B2CF), + SPH_C64(0xFB95A29595FB49DC), SPH_C64(0x9FD875D8D89F568E), + SPH_C64(0x30FB10FBFB30708B), SPH_C64(0x71EE2FEEEE71CD23), + SPH_C64(0x917C847C7C91BBC7), SPH_C64(0xE366AA6666E37117), + SPH_C64(0x8EDD7ADDDD8E7BA6), SPH_C64(0x4B173917174BAFB8), + SPH_C64(0x4647C94747464502), SPH_C64(0xDC9EBF9E9EDC1A84), + SPH_C64(0xC5CA43CACAC5D41E), SPH_C64(0x992D772D2D995875), + SPH_C64(0x79BFDCBFBF792E91), SPH_C64(0x1B070907071B3F38), + SPH_C64(0x23ADEAADAD23AC01), SPH_C64(0x2F5AEE5A5A2FB0EA), + SPH_C64(0xB583988383B5EF6C), SPH_C64(0xFF33553333FFB685), + SPH_C64(0xF263A56363F25C3F), SPH_C64(0x0A020602020A1210), + SPH_C64(0x38AAE3AAAA389339), SPH_C64(0xA871937171A8DEAF), + SPH_C64(0xCFC845C8C8CFC60E), SPH_C64(0x7D192B19197DD1C8), + SPH_C64(0x7049DB4949703B72), SPH_C64(0x9AD976D9D99A5F86), + SPH_C64(0x1DF20BF2F21D31C3), SPH_C64(0x48E338E3E348A84B), + SPH_C64(0x2A5BED5B5B2AB9E2), SPH_C64(0x928885888892BC34), + SPH_C64(0xC89AB39A9AC83EA4), SPH_C64(0xBE266A2626BE0B2D), + SPH_C64(0xFA32563232FABF8D), SPH_C64(0x4AB0CDB0B04A59E9), + SPH_C64(0x6AE926E9E96AF21B), SPH_C64(0x330F110F0F337778), + SPH_C64(0xA6D562D5D5A633E6), SPH_C64(0xBA809D8080BAF474), + SPH_C64(0x7CBEDFBEBE7C2799), SPH_C64(0xDECD4ACDCDDEEB26), + SPH_C64(0xE4345C3434E489BD), SPH_C64(0x7548D8484875327A), + SPH_C64(0x24FF1CFFFF2454AB), SPH_C64(0x8F7A8E7A7A8F8DF7), + SPH_C64(0xEA90AD9090EA64F4), SPH_C64(0x3E5FE15F5F3E9DC2), + SPH_C64(0xA020602020A03D1D), SPH_C64(0xD568B86868D50F67), + SPH_C64(0x721A2E1A1A72CAD0), SPH_C64(0x2CAEEFAEAE2CB719), + SPH_C64(0x5EB4C1B4B45E7DC9), SPH_C64(0x1954FC545419CE9A), + SPH_C64(0xE593A89393E57FEC), SPH_C64(0xAA22662222AA2F0D), + SPH_C64(0xE964AC6464E96307), SPH_C64(0x12F10EF1F1122ADB), + SPH_C64(0xA273957373A2CCBF), SPH_C64(0x5A123612125A8290), + SPH_C64(0x5D40C040405D7A3A), SPH_C64(0x2808180808284840), + SPH_C64(0xE8C358C3C3E89556), SPH_C64(0x7BEC29ECEC7BDF33), + SPH_C64(0x90DB70DBDB904D96), SPH_C64(0x1FA1FEA1A11FC061), + SPH_C64(0x838D8A8D8D83911C), SPH_C64(0xC93D473D3DC9C8F5), + SPH_C64(0xF197A49797F15BCC), SPH_C64(0x0000000000000000), + SPH_C64(0xD4CF4CCFCFD4F936), SPH_C64(0x872B7D2B2B876E45), + SPH_C64(0xB3769A7676B3E197), SPH_C64(0xB0829B8282B0E664), + SPH_C64(0xA9D667D6D6A928FE), SPH_C64(0x771B2D1B1B77C3D8), + SPH_C64(0x5BB5C2B5B55B74C1), SPH_C64(0x29AFECAFAF29BE11), + SPH_C64(0xDF6ABE6A6ADF1D77), SPH_C64(0x0D50F050500DEABA), + SPH_C64(0x4C45CF45454C5712), SPH_C64(0x18F308F3F31838CB), + SPH_C64(0xF030503030F0AD9D), SPH_C64(0x74EF2CEFEF74C42B), + SPH_C64(0xC33F413F3FC3DAE5), SPH_C64(0x1C55FF55551CC792), + SPH_C64(0x10A2FBA2A210DB79), SPH_C64(0x65EA23EAEA65E903), + SPH_C64(0xEC65AF6565EC6A0F), SPH_C64(0x68BAD3BABA6803B9), + SPH_C64(0x932F712F2F934A65), SPH_C64(0xE7C05DC0C0E78E4E), + SPH_C64(0x81DE7FDEDE8160BE), SPH_C64(0x6C1C241C1C6CFCE0), + SPH_C64(0x2EFD1AFDFD2E46BB), SPH_C64(0x644DD74D4D641F52), + SPH_C64(0xE092AB9292E076E4), SPH_C64(0xBC759F7575BCFA8F), + SPH_C64(0x1E060A06061E3630), SPH_C64(0x988A838A8A98AE24), + SPH_C64(0x40B2CBB2B2404BF9), SPH_C64(0x59E637E6E6598563), + SPH_C64(0x360E120E0E367E70), SPH_C64(0x631F211F1F63E7F8), + SPH_C64(0xF762A66262F75537), SPH_C64(0xA3D461D4D4A33AEE), + SPH_C64(0x32A8E5A8A8328129), SPH_C64(0xF496A79696F452C4), + SPH_C64(0x3AF916F9F93A629B), SPH_C64(0xF6C552C5C5F6A366), + SPH_C64(0xB1256F2525B11035), SPH_C64(0x2059EB595920ABF2), + SPH_C64(0xAE84918484AED054), SPH_C64(0xA772967272A7C5B7), + SPH_C64(0xDD394B3939DDECD5), SPH_C64(0x614CD44C4C61165A), + SPH_C64(0x3B5EE25E5E3B94CA), SPH_C64(0x8578887878859FE7), + SPH_C64(0xD838483838D8E5DD), SPH_C64(0x868C898C8C869814), + SPH_C64(0xB2D16ED1D1B217C6), SPH_C64(0x0BA5F2A5A50BE441), + SPH_C64(0x4DE23BE2E24DA143), SPH_C64(0xF861A36161F84E2F), + SPH_C64(0x45B3C8B3B34542F1), SPH_C64(0xA521632121A53415), + SPH_C64(0xD69CB99C9CD60894), SPH_C64(0x661E221E1E66EEF0), + SPH_C64(0x5243C54343526122), SPH_C64(0xFCC754C7C7FCB176), + SPH_C64(0x2BFC19FCFC2B4FB3), SPH_C64(0x14040C0404142420), + SPH_C64(0x0851F3515108E3B2), SPH_C64(0xC799B69999C725BC), + SPH_C64(0xC46DB76D6DC4224F), SPH_C64(0x390D170D0D396568), + SPH_C64(0x35FA13FAFA357983), SPH_C64(0x84DF7CDFDF8469B6), + SPH_C64(0x9B7E827E7E9BA9D7), SPH_C64(0xB4246C2424B4193D), + SPH_C64(0xD73B4D3B3BD7FEC5), SPH_C64(0x3DABE0ABAB3D9A31), + SPH_C64(0xD1CE4FCECED1F03E), SPH_C64(0x5511331111559988), + SPH_C64(0x898F8C8F8F89830C), SPH_C64(0x6B4ED24E4E6B044A), + SPH_C64(0x51B7C4B7B75166D1), SPH_C64(0x60EB20EBEB60E00B), + SPH_C64(0xCC3C443C3CCCC1FD), SPH_C64(0xBF819E8181BFFD7C), + SPH_C64(0xFE94A19494FE40D4), SPH_C64(0x0CF704F7F70C1CEB), + SPH_C64(0x67B9D6B9B96718A1), SPH_C64(0x5F133513135F8B98), + SPH_C64(0x9C2C742C2C9C517D), SPH_C64(0xB8D368D3D3B805D6), + SPH_C64(0x5CE734E7E75C8C6B), SPH_C64(0xCB6EB26E6ECB3957), + SPH_C64(0xF3C451C4C4F3AA6E), SPH_C64(0x0F030503030F1B18), + SPH_C64(0x1356FA565613DC8A), SPH_C64(0x4944CC4444495E1A), + SPH_C64(0x9E7F817F7F9EA0DF), SPH_C64(0x37A9E6A9A9378821), + SPH_C64(0x822A7E2A2A82674D), SPH_C64(0x6DBBD0BBBB6D0AB1), + SPH_C64(0xE2C15EC1C1E28746), SPH_C64(0x0253F5535302F1A2), + SPH_C64(0x8BDC79DCDC8B72AE), SPH_C64(0x270B1D0B0B275358), + SPH_C64(0xD39DBA9D9DD3019C), SPH_C64(0xC16CB46C6CC12B47), + SPH_C64(0xF531533131F5A495), SPH_C64(0xB9749C7474B9F387), + SPH_C64(0x09F607F6F60915E3), SPH_C64(0x4346CA4646434C0A), + SPH_C64(0x26ACE9ACAC26A509), SPH_C64(0x978986898997B53C), + SPH_C64(0x44143C141444B4A0), SPH_C64(0x42E13EE1E142BA5B), + SPH_C64(0x4E163A16164EA6B0), SPH_C64(0xD23A4E3A3AD2F7CD), + SPH_C64(0xD069BB6969D0066F), SPH_C64(0x2D091B09092D4148), + SPH_C64(0xAD70907070ADD7A7), SPH_C64(0x54B6C7B6B6546FD9), + SPH_C64(0xB7D06DD0D0B71ECE), SPH_C64(0x7EED2AEDED7ED63B), + SPH_C64(0xDBCC49CCCCDBE22E), SPH_C64(0x5742C6424257682A), + SPH_C64(0xC298B59898C22CB4), SPH_C64(0x0EA4F1A4A40EED49), + SPH_C64(0x882878282888755D), SPH_C64(0x315CE45C5C3186DA), + SPH_C64(0x3FF815F8F83F6B93), SPH_C64(0xA486978686A4C244) +}; + +static const uint64_t old1_T4[256] = { + SPH_C64(0x1828181878D8C078), SPH_C64(0x23652323AF2605AF), + SPH_C64(0xC657C6C6F9B87EF9), SPH_C64(0xE825E8E86FFB136F), + SPH_C64(0x87948787A1CB4CA1), SPH_C64(0xB8D5B8B86211A962), + SPH_C64(0x0103010105090805), SPH_C64(0x4FD14F4F6E0D426E), + SPH_C64(0x365A3636EE9BADEE), SPH_C64(0xA6F7A6A604FF5904), + SPH_C64(0xD26BD2D2BD0CDEBD), SPH_C64(0xF502F5F5060EFB06), + SPH_C64(0x798B79798096EF80), SPH_C64(0x6FB16F6FCE305FCE), + SPH_C64(0x91AE9191EF6DFCEF), SPH_C64(0x52F6525207F8AA07), + SPH_C64(0x60A06060FD4727FD), SPH_C64(0xBCD9BCBC76358976), + SPH_C64(0x9BB09B9BCD37ACCD), SPH_C64(0x8E8F8E8E8C8A048C), + SPH_C64(0xA3F8A3A315D27115), SPH_C64(0x0C140C0C3C6C603C), + SPH_C64(0x7B8D7B7B8A84FF8A), SPH_C64(0x355F3535E180B5E1), + SPH_C64(0x1D271D1D69F5E869), SPH_C64(0xE03DE0E047B35347), + SPH_C64(0xD764D7D7AC21F6AC), SPH_C64(0xC25BC2C2ED9C5EED), + SPH_C64(0x2E722E2E96436D96), SPH_C64(0x4BDD4B4B7A29627A), + SPH_C64(0xFE1FFEFE215DA321), SPH_C64(0x57F9575716D58216), + SPH_C64(0x153F151541BDA841), SPH_C64(0x77997777B6E89FB6), + SPH_C64(0x37593737EB92A5EB), SPH_C64(0xE532E5E5569E7B56), + SPH_C64(0x9FBC9F9FD9138CD9), SPH_C64(0xF00DF0F01723D317), + SPH_C64(0x4ADE4A4A7F206A7F), SPH_C64(0xDA73DADA95449E95), + SPH_C64(0x58E8585825A2FA25), SPH_C64(0xC946C9C9CACF06CA), + SPH_C64(0x297B29298D7C558D), SPH_C64(0x0A1E0A0A225A5022), + SPH_C64(0xB1CEB1B14F50E14F), SPH_C64(0xA0FDA0A01AC9691A), + SPH_C64(0x6BBD6B6BDA147FDA), SPH_C64(0x85928585ABD95CAB), + SPH_C64(0xBDDABDBD733C8173), SPH_C64(0x5DE75D5D348FD234), + SPH_C64(0x1030101050908050), SPH_C64(0xF401F4F40307F303), + SPH_C64(0xCB40CBCBC0DD16C0), SPH_C64(0x3E423E3EC6D3EDC6), + SPH_C64(0x050F0505112D2811), SPH_C64(0x67A96767E6781FE6), + SPH_C64(0xE431E4E453977353), SPH_C64(0x27692727BB0225BB), + SPH_C64(0x41C3414158733258), SPH_C64(0x8B808B8B9DA72C9D), + SPH_C64(0xA7F4A7A701F65101), SPH_C64(0x7D877D7D94B2CF94), + SPH_C64(0x95A29595FB49DCFB), SPH_C64(0xD875D8D89F568E9F), + SPH_C64(0xFB10FBFB30708B30), SPH_C64(0xEE2FEEEE71CD2371), + SPH_C64(0x7C847C7C91BBC791), SPH_C64(0x66AA6666E37117E3), + SPH_C64(0xDD7ADDDD8E7BA68E), SPH_C64(0x173917174BAFB84B), + SPH_C64(0x47C9474746450246), SPH_C64(0x9EBF9E9EDC1A84DC), + SPH_C64(0xCA43CACAC5D41EC5), SPH_C64(0x2D772D2D99587599), + SPH_C64(0xBFDCBFBF792E9179), SPH_C64(0x070907071B3F381B), + SPH_C64(0xADEAADAD23AC0123), SPH_C64(0x5AEE5A5A2FB0EA2F), + SPH_C64(0x83988383B5EF6CB5), SPH_C64(0x33553333FFB685FF), + SPH_C64(0x63A56363F25C3FF2), SPH_C64(0x020602020A12100A), + SPH_C64(0xAAE3AAAA38933938), SPH_C64(0x71937171A8DEAFA8), + SPH_C64(0xC845C8C8CFC60ECF), SPH_C64(0x192B19197DD1C87D), + SPH_C64(0x49DB4949703B7270), SPH_C64(0xD976D9D99A5F869A), + SPH_C64(0xF20BF2F21D31C31D), SPH_C64(0xE338E3E348A84B48), + SPH_C64(0x5BED5B5B2AB9E22A), SPH_C64(0x8885888892BC3492), + SPH_C64(0x9AB39A9AC83EA4C8), SPH_C64(0x266A2626BE0B2DBE), + SPH_C64(0x32563232FABF8DFA), SPH_C64(0xB0CDB0B04A59E94A), + SPH_C64(0xE926E9E96AF21B6A), SPH_C64(0x0F110F0F33777833), + SPH_C64(0xD562D5D5A633E6A6), SPH_C64(0x809D8080BAF474BA), + SPH_C64(0xBEDFBEBE7C27997C), SPH_C64(0xCD4ACDCDDEEB26DE), + SPH_C64(0x345C3434E489BDE4), SPH_C64(0x48D8484875327A75), + SPH_C64(0xFF1CFFFF2454AB24), SPH_C64(0x7A8E7A7A8F8DF78F), + SPH_C64(0x90AD9090EA64F4EA), SPH_C64(0x5FE15F5F3E9DC23E), + SPH_C64(0x20602020A03D1DA0), SPH_C64(0x68B86868D50F67D5), + SPH_C64(0x1A2E1A1A72CAD072), SPH_C64(0xAEEFAEAE2CB7192C), + SPH_C64(0xB4C1B4B45E7DC95E), SPH_C64(0x54FC545419CE9A19), + SPH_C64(0x93A89393E57FECE5), SPH_C64(0x22662222AA2F0DAA), + SPH_C64(0x64AC6464E96307E9), SPH_C64(0xF10EF1F1122ADB12), + SPH_C64(0x73957373A2CCBFA2), SPH_C64(0x123612125A82905A), + SPH_C64(0x40C040405D7A3A5D), SPH_C64(0x0818080828484028), + SPH_C64(0xC358C3C3E89556E8), SPH_C64(0xEC29ECEC7BDF337B), + SPH_C64(0xDB70DBDB904D9690), SPH_C64(0xA1FEA1A11FC0611F), + SPH_C64(0x8D8A8D8D83911C83), SPH_C64(0x3D473D3DC9C8F5C9), + SPH_C64(0x97A49797F15BCCF1), SPH_C64(0x0000000000000000), + SPH_C64(0xCF4CCFCFD4F936D4), SPH_C64(0x2B7D2B2B876E4587), + SPH_C64(0x769A7676B3E197B3), SPH_C64(0x829B8282B0E664B0), + SPH_C64(0xD667D6D6A928FEA9), SPH_C64(0x1B2D1B1B77C3D877), + SPH_C64(0xB5C2B5B55B74C15B), SPH_C64(0xAFECAFAF29BE1129), + SPH_C64(0x6ABE6A6ADF1D77DF), SPH_C64(0x50F050500DEABA0D), + SPH_C64(0x45CF45454C57124C), SPH_C64(0xF308F3F31838CB18), + SPH_C64(0x30503030F0AD9DF0), SPH_C64(0xEF2CEFEF74C42B74), + SPH_C64(0x3F413F3FC3DAE5C3), SPH_C64(0x55FF55551CC7921C), + SPH_C64(0xA2FBA2A210DB7910), SPH_C64(0xEA23EAEA65E90365), + SPH_C64(0x65AF6565EC6A0FEC), SPH_C64(0xBAD3BABA6803B968), + SPH_C64(0x2F712F2F934A6593), SPH_C64(0xC05DC0C0E78E4EE7), + SPH_C64(0xDE7FDEDE8160BE81), SPH_C64(0x1C241C1C6CFCE06C), + SPH_C64(0xFD1AFDFD2E46BB2E), SPH_C64(0x4DD74D4D641F5264), + SPH_C64(0x92AB9292E076E4E0), SPH_C64(0x759F7575BCFA8FBC), + SPH_C64(0x060A06061E36301E), SPH_C64(0x8A838A8A98AE2498), + SPH_C64(0xB2CBB2B2404BF940), SPH_C64(0xE637E6E659856359), + SPH_C64(0x0E120E0E367E7036), SPH_C64(0x1F211F1F63E7F863), + SPH_C64(0x62A66262F75537F7), SPH_C64(0xD461D4D4A33AEEA3), + SPH_C64(0xA8E5A8A832812932), SPH_C64(0x96A79696F452C4F4), + SPH_C64(0xF916F9F93A629B3A), SPH_C64(0xC552C5C5F6A366F6), + SPH_C64(0x256F2525B11035B1), SPH_C64(0x59EB595920ABF220), + SPH_C64(0x84918484AED054AE), SPH_C64(0x72967272A7C5B7A7), + SPH_C64(0x394B3939DDECD5DD), SPH_C64(0x4CD44C4C61165A61), + SPH_C64(0x5EE25E5E3B94CA3B), SPH_C64(0x78887878859FE785), + SPH_C64(0x38483838D8E5DDD8), SPH_C64(0x8C898C8C86981486), + SPH_C64(0xD16ED1D1B217C6B2), SPH_C64(0xA5F2A5A50BE4410B), + SPH_C64(0xE23BE2E24DA1434D), SPH_C64(0x61A36161F84E2FF8), + SPH_C64(0xB3C8B3B34542F145), SPH_C64(0x21632121A53415A5), + SPH_C64(0x9CB99C9CD60894D6), SPH_C64(0x1E221E1E66EEF066), + SPH_C64(0x43C5434352612252), SPH_C64(0xC754C7C7FCB176FC), + SPH_C64(0xFC19FCFC2B4FB32B), SPH_C64(0x040C040414242014), + SPH_C64(0x51F3515108E3B208), SPH_C64(0x99B69999C725BCC7), + SPH_C64(0x6DB76D6DC4224FC4), SPH_C64(0x0D170D0D39656839), + SPH_C64(0xFA13FAFA35798335), SPH_C64(0xDF7CDFDF8469B684), + SPH_C64(0x7E827E7E9BA9D79B), SPH_C64(0x246C2424B4193DB4), + SPH_C64(0x3B4D3B3BD7FEC5D7), SPH_C64(0xABE0ABAB3D9A313D), + SPH_C64(0xCE4FCECED1F03ED1), SPH_C64(0x1133111155998855), + SPH_C64(0x8F8C8F8F89830C89), SPH_C64(0x4ED24E4E6B044A6B), + SPH_C64(0xB7C4B7B75166D151), SPH_C64(0xEB20EBEB60E00B60), + SPH_C64(0x3C443C3CCCC1FDCC), SPH_C64(0x819E8181BFFD7CBF), + SPH_C64(0x94A19494FE40D4FE), SPH_C64(0xF704F7F70C1CEB0C), + SPH_C64(0xB9D6B9B96718A167), SPH_C64(0x133513135F8B985F), + SPH_C64(0x2C742C2C9C517D9C), SPH_C64(0xD368D3D3B805D6B8), + SPH_C64(0xE734E7E75C8C6B5C), SPH_C64(0x6EB26E6ECB3957CB), + SPH_C64(0xC451C4C4F3AA6EF3), SPH_C64(0x030503030F1B180F), + SPH_C64(0x56FA565613DC8A13), SPH_C64(0x44CC4444495E1A49), + SPH_C64(0x7F817F7F9EA0DF9E), SPH_C64(0xA9E6A9A937882137), + SPH_C64(0x2A7E2A2A82674D82), SPH_C64(0xBBD0BBBB6D0AB16D), + SPH_C64(0xC15EC1C1E28746E2), SPH_C64(0x53F5535302F1A202), + SPH_C64(0xDC79DCDC8B72AE8B), SPH_C64(0x0B1D0B0B27535827), + SPH_C64(0x9DBA9D9DD3019CD3), SPH_C64(0x6CB46C6CC12B47C1), + SPH_C64(0x31533131F5A495F5), SPH_C64(0x749C7474B9F387B9), + SPH_C64(0xF607F6F60915E309), SPH_C64(0x46CA4646434C0A43), + SPH_C64(0xACE9ACAC26A50926), SPH_C64(0x8986898997B53C97), + SPH_C64(0x143C141444B4A044), SPH_C64(0xE13EE1E142BA5B42), + SPH_C64(0x163A16164EA6B04E), SPH_C64(0x3A4E3A3AD2F7CDD2), + SPH_C64(0x69BB6969D0066FD0), SPH_C64(0x091B09092D41482D), + SPH_C64(0x70907070ADD7A7AD), SPH_C64(0xB6C7B6B6546FD954), + SPH_C64(0xD06DD0D0B71ECEB7), SPH_C64(0xED2AEDED7ED63B7E), + SPH_C64(0xCC49CCCCDBE22EDB), SPH_C64(0x42C6424257682A57), + SPH_C64(0x98B59898C22CB4C2), SPH_C64(0xA4F1A4A40EED490E), + SPH_C64(0x2878282888755D88), SPH_C64(0x5CE45C5C3186DA31), + SPH_C64(0xF815F8F83F6B933F), SPH_C64(0x86978686A4C244A4) +}; + +static const uint64_t old1_T5[256] = { + SPH_C64(0x28181878D8C07818), SPH_C64(0x652323AF2605AF23), + SPH_C64(0x57C6C6F9B87EF9C6), SPH_C64(0x25E8E86FFB136FE8), + SPH_C64(0x948787A1CB4CA187), SPH_C64(0xD5B8B86211A962B8), + SPH_C64(0x0301010509080501), SPH_C64(0xD14F4F6E0D426E4F), + SPH_C64(0x5A3636EE9BADEE36), SPH_C64(0xF7A6A604FF5904A6), + SPH_C64(0x6BD2D2BD0CDEBDD2), SPH_C64(0x02F5F5060EFB06F5), + SPH_C64(0x8B79798096EF8079), SPH_C64(0xB16F6FCE305FCE6F), + SPH_C64(0xAE9191EF6DFCEF91), SPH_C64(0xF6525207F8AA0752), + SPH_C64(0xA06060FD4727FD60), SPH_C64(0xD9BCBC76358976BC), + SPH_C64(0xB09B9BCD37ACCD9B), SPH_C64(0x8F8E8E8C8A048C8E), + SPH_C64(0xF8A3A315D27115A3), SPH_C64(0x140C0C3C6C603C0C), + SPH_C64(0x8D7B7B8A84FF8A7B), SPH_C64(0x5F3535E180B5E135), + SPH_C64(0x271D1D69F5E8691D), SPH_C64(0x3DE0E047B35347E0), + SPH_C64(0x64D7D7AC21F6ACD7), SPH_C64(0x5BC2C2ED9C5EEDC2), + SPH_C64(0x722E2E96436D962E), SPH_C64(0xDD4B4B7A29627A4B), + SPH_C64(0x1FFEFE215DA321FE), SPH_C64(0xF9575716D5821657), + SPH_C64(0x3F151541BDA84115), SPH_C64(0x997777B6E89FB677), + SPH_C64(0x593737EB92A5EB37), SPH_C64(0x32E5E5569E7B56E5), + SPH_C64(0xBC9F9FD9138CD99F), SPH_C64(0x0DF0F01723D317F0), + SPH_C64(0xDE4A4A7F206A7F4A), SPH_C64(0x73DADA95449E95DA), + SPH_C64(0xE8585825A2FA2558), SPH_C64(0x46C9C9CACF06CAC9), + SPH_C64(0x7B29298D7C558D29), SPH_C64(0x1E0A0A225A50220A), + SPH_C64(0xCEB1B14F50E14FB1), SPH_C64(0xFDA0A01AC9691AA0), + SPH_C64(0xBD6B6BDA147FDA6B), SPH_C64(0x928585ABD95CAB85), + SPH_C64(0xDABDBD733C8173BD), SPH_C64(0xE75D5D348FD2345D), + SPH_C64(0x3010105090805010), SPH_C64(0x01F4F40307F303F4), + SPH_C64(0x40CBCBC0DD16C0CB), SPH_C64(0x423E3EC6D3EDC63E), + SPH_C64(0x0F0505112D281105), SPH_C64(0xA96767E6781FE667), + SPH_C64(0x31E4E453977353E4), SPH_C64(0x692727BB0225BB27), + SPH_C64(0xC341415873325841), SPH_C64(0x808B8B9DA72C9D8B), + SPH_C64(0xF4A7A701F65101A7), SPH_C64(0x877D7D94B2CF947D), + SPH_C64(0xA29595FB49DCFB95), SPH_C64(0x75D8D89F568E9FD8), + SPH_C64(0x10FBFB30708B30FB), SPH_C64(0x2FEEEE71CD2371EE), + SPH_C64(0x847C7C91BBC7917C), SPH_C64(0xAA6666E37117E366), + SPH_C64(0x7ADDDD8E7BA68EDD), SPH_C64(0x3917174BAFB84B17), + SPH_C64(0xC947474645024647), SPH_C64(0xBF9E9EDC1A84DC9E), + SPH_C64(0x43CACAC5D41EC5CA), SPH_C64(0x772D2D995875992D), + SPH_C64(0xDCBFBF792E9179BF), SPH_C64(0x0907071B3F381B07), + SPH_C64(0xEAADAD23AC0123AD), SPH_C64(0xEE5A5A2FB0EA2F5A), + SPH_C64(0x988383B5EF6CB583), SPH_C64(0x553333FFB685FF33), + SPH_C64(0xA56363F25C3FF263), SPH_C64(0x0602020A12100A02), + SPH_C64(0xE3AAAA38933938AA), SPH_C64(0x937171A8DEAFA871), + SPH_C64(0x45C8C8CFC60ECFC8), SPH_C64(0x2B19197DD1C87D19), + SPH_C64(0xDB4949703B727049), SPH_C64(0x76D9D99A5F869AD9), + SPH_C64(0x0BF2F21D31C31DF2), SPH_C64(0x38E3E348A84B48E3), + SPH_C64(0xED5B5B2AB9E22A5B), SPH_C64(0x85888892BC349288), + SPH_C64(0xB39A9AC83EA4C89A), SPH_C64(0x6A2626BE0B2DBE26), + SPH_C64(0x563232FABF8DFA32), SPH_C64(0xCDB0B04A59E94AB0), + SPH_C64(0x26E9E96AF21B6AE9), SPH_C64(0x110F0F337778330F), + SPH_C64(0x62D5D5A633E6A6D5), SPH_C64(0x9D8080BAF474BA80), + SPH_C64(0xDFBEBE7C27997CBE), SPH_C64(0x4ACDCDDEEB26DECD), + SPH_C64(0x5C3434E489BDE434), SPH_C64(0xD8484875327A7548), + SPH_C64(0x1CFFFF2454AB24FF), SPH_C64(0x8E7A7A8F8DF78F7A), + SPH_C64(0xAD9090EA64F4EA90), SPH_C64(0xE15F5F3E9DC23E5F), + SPH_C64(0x602020A03D1DA020), SPH_C64(0xB86868D50F67D568), + SPH_C64(0x2E1A1A72CAD0721A), SPH_C64(0xEFAEAE2CB7192CAE), + SPH_C64(0xC1B4B45E7DC95EB4), SPH_C64(0xFC545419CE9A1954), + SPH_C64(0xA89393E57FECE593), SPH_C64(0x662222AA2F0DAA22), + SPH_C64(0xAC6464E96307E964), SPH_C64(0x0EF1F1122ADB12F1), + SPH_C64(0x957373A2CCBFA273), SPH_C64(0x3612125A82905A12), + SPH_C64(0xC040405D7A3A5D40), SPH_C64(0x1808082848402808), + SPH_C64(0x58C3C3E89556E8C3), SPH_C64(0x29ECEC7BDF337BEC), + SPH_C64(0x70DBDB904D9690DB), SPH_C64(0xFEA1A11FC0611FA1), + SPH_C64(0x8A8D8D83911C838D), SPH_C64(0x473D3DC9C8F5C93D), + SPH_C64(0xA49797F15BCCF197), SPH_C64(0x0000000000000000), + SPH_C64(0x4CCFCFD4F936D4CF), SPH_C64(0x7D2B2B876E45872B), + SPH_C64(0x9A7676B3E197B376), SPH_C64(0x9B8282B0E664B082), + SPH_C64(0x67D6D6A928FEA9D6), SPH_C64(0x2D1B1B77C3D8771B), + SPH_C64(0xC2B5B55B74C15BB5), SPH_C64(0xECAFAF29BE1129AF), + SPH_C64(0xBE6A6ADF1D77DF6A), SPH_C64(0xF050500DEABA0D50), + SPH_C64(0xCF45454C57124C45), SPH_C64(0x08F3F31838CB18F3), + SPH_C64(0x503030F0AD9DF030), SPH_C64(0x2CEFEF74C42B74EF), + SPH_C64(0x413F3FC3DAE5C33F), SPH_C64(0xFF55551CC7921C55), + SPH_C64(0xFBA2A210DB7910A2), SPH_C64(0x23EAEA65E90365EA), + SPH_C64(0xAF6565EC6A0FEC65), SPH_C64(0xD3BABA6803B968BA), + SPH_C64(0x712F2F934A65932F), SPH_C64(0x5DC0C0E78E4EE7C0), + SPH_C64(0x7FDEDE8160BE81DE), SPH_C64(0x241C1C6CFCE06C1C), + SPH_C64(0x1AFDFD2E46BB2EFD), SPH_C64(0xD74D4D641F52644D), + SPH_C64(0xAB9292E076E4E092), SPH_C64(0x9F7575BCFA8FBC75), + SPH_C64(0x0A06061E36301E06), SPH_C64(0x838A8A98AE24988A), + SPH_C64(0xCBB2B2404BF940B2), SPH_C64(0x37E6E659856359E6), + SPH_C64(0x120E0E367E70360E), SPH_C64(0x211F1F63E7F8631F), + SPH_C64(0xA66262F75537F762), SPH_C64(0x61D4D4A33AEEA3D4), + SPH_C64(0xE5A8A832812932A8), SPH_C64(0xA79696F452C4F496), + SPH_C64(0x16F9F93A629B3AF9), SPH_C64(0x52C5C5F6A366F6C5), + SPH_C64(0x6F2525B11035B125), SPH_C64(0xEB595920ABF22059), + SPH_C64(0x918484AED054AE84), SPH_C64(0x967272A7C5B7A772), + SPH_C64(0x4B3939DDECD5DD39), SPH_C64(0xD44C4C61165A614C), + SPH_C64(0xE25E5E3B94CA3B5E), SPH_C64(0x887878859FE78578), + SPH_C64(0x483838D8E5DDD838), SPH_C64(0x898C8C869814868C), + SPH_C64(0x6ED1D1B217C6B2D1), SPH_C64(0xF2A5A50BE4410BA5), + SPH_C64(0x3BE2E24DA1434DE2), SPH_C64(0xA36161F84E2FF861), + SPH_C64(0xC8B3B34542F145B3), SPH_C64(0x632121A53415A521), + SPH_C64(0xB99C9CD60894D69C), SPH_C64(0x221E1E66EEF0661E), + SPH_C64(0xC543435261225243), SPH_C64(0x54C7C7FCB176FCC7), + SPH_C64(0x19FCFC2B4FB32BFC), SPH_C64(0x0C04041424201404), + SPH_C64(0xF3515108E3B20851), SPH_C64(0xB69999C725BCC799), + SPH_C64(0xB76D6DC4224FC46D), SPH_C64(0x170D0D396568390D), + SPH_C64(0x13FAFA35798335FA), SPH_C64(0x7CDFDF8469B684DF), + SPH_C64(0x827E7E9BA9D79B7E), SPH_C64(0x6C2424B4193DB424), + SPH_C64(0x4D3B3BD7FEC5D73B), SPH_C64(0xE0ABAB3D9A313DAB), + SPH_C64(0x4FCECED1F03ED1CE), SPH_C64(0x3311115599885511), + SPH_C64(0x8C8F8F89830C898F), SPH_C64(0xD24E4E6B044A6B4E), + SPH_C64(0xC4B7B75166D151B7), SPH_C64(0x20EBEB60E00B60EB), + SPH_C64(0x443C3CCCC1FDCC3C), SPH_C64(0x9E8181BFFD7CBF81), + SPH_C64(0xA19494FE40D4FE94), SPH_C64(0x04F7F70C1CEB0CF7), + SPH_C64(0xD6B9B96718A167B9), SPH_C64(0x3513135F8B985F13), + SPH_C64(0x742C2C9C517D9C2C), SPH_C64(0x68D3D3B805D6B8D3), + SPH_C64(0x34E7E75C8C6B5CE7), SPH_C64(0xB26E6ECB3957CB6E), + SPH_C64(0x51C4C4F3AA6EF3C4), SPH_C64(0x0503030F1B180F03), + SPH_C64(0xFA565613DC8A1356), SPH_C64(0xCC4444495E1A4944), + SPH_C64(0x817F7F9EA0DF9E7F), SPH_C64(0xE6A9A937882137A9), + SPH_C64(0x7E2A2A82674D822A), SPH_C64(0xD0BBBB6D0AB16DBB), + SPH_C64(0x5EC1C1E28746E2C1), SPH_C64(0xF5535302F1A20253), + SPH_C64(0x79DCDC8B72AE8BDC), SPH_C64(0x1D0B0B275358270B), + SPH_C64(0xBA9D9DD3019CD39D), SPH_C64(0xB46C6CC12B47C16C), + SPH_C64(0x533131F5A495F531), SPH_C64(0x9C7474B9F387B974), + SPH_C64(0x07F6F60915E309F6), SPH_C64(0xCA4646434C0A4346), + SPH_C64(0xE9ACAC26A50926AC), SPH_C64(0x86898997B53C9789), + SPH_C64(0x3C141444B4A04414), SPH_C64(0x3EE1E142BA5B42E1), + SPH_C64(0x3A16164EA6B04E16), SPH_C64(0x4E3A3AD2F7CDD23A), + SPH_C64(0xBB6969D0066FD069), SPH_C64(0x1B09092D41482D09), + SPH_C64(0x907070ADD7A7AD70), SPH_C64(0xC7B6B6546FD954B6), + SPH_C64(0x6DD0D0B71ECEB7D0), SPH_C64(0x2AEDED7ED63B7EED), + SPH_C64(0x49CCCCDBE22EDBCC), SPH_C64(0xC6424257682A5742), + SPH_C64(0xB59898C22CB4C298), SPH_C64(0xF1A4A40EED490EA4), + SPH_C64(0x78282888755D8828), SPH_C64(0xE45C5C3186DA315C), + SPH_C64(0x15F8F83F6B933FF8), SPH_C64(0x978686A4C244A486) +}; + +static const uint64_t old1_T6[256] = { + SPH_C64(0x181878D8C0781828), SPH_C64(0x2323AF2605AF2365), + SPH_C64(0xC6C6F9B87EF9C657), SPH_C64(0xE8E86FFB136FE825), + SPH_C64(0x8787A1CB4CA18794), SPH_C64(0xB8B86211A962B8D5), + SPH_C64(0x0101050908050103), SPH_C64(0x4F4F6E0D426E4FD1), + SPH_C64(0x3636EE9BADEE365A), SPH_C64(0xA6A604FF5904A6F7), + SPH_C64(0xD2D2BD0CDEBDD26B), SPH_C64(0xF5F5060EFB06F502), + SPH_C64(0x79798096EF80798B), SPH_C64(0x6F6FCE305FCE6FB1), + SPH_C64(0x9191EF6DFCEF91AE), SPH_C64(0x525207F8AA0752F6), + SPH_C64(0x6060FD4727FD60A0), SPH_C64(0xBCBC76358976BCD9), + SPH_C64(0x9B9BCD37ACCD9BB0), SPH_C64(0x8E8E8C8A048C8E8F), + SPH_C64(0xA3A315D27115A3F8), SPH_C64(0x0C0C3C6C603C0C14), + SPH_C64(0x7B7B8A84FF8A7B8D), SPH_C64(0x3535E180B5E1355F), + SPH_C64(0x1D1D69F5E8691D27), SPH_C64(0xE0E047B35347E03D), + SPH_C64(0xD7D7AC21F6ACD764), SPH_C64(0xC2C2ED9C5EEDC25B), + SPH_C64(0x2E2E96436D962E72), SPH_C64(0x4B4B7A29627A4BDD), + SPH_C64(0xFEFE215DA321FE1F), SPH_C64(0x575716D5821657F9), + SPH_C64(0x151541BDA841153F), SPH_C64(0x7777B6E89FB67799), + SPH_C64(0x3737EB92A5EB3759), SPH_C64(0xE5E5569E7B56E532), + SPH_C64(0x9F9FD9138CD99FBC), SPH_C64(0xF0F01723D317F00D), + SPH_C64(0x4A4A7F206A7F4ADE), SPH_C64(0xDADA95449E95DA73), + SPH_C64(0x585825A2FA2558E8), SPH_C64(0xC9C9CACF06CAC946), + SPH_C64(0x29298D7C558D297B), SPH_C64(0x0A0A225A50220A1E), + SPH_C64(0xB1B14F50E14FB1CE), SPH_C64(0xA0A01AC9691AA0FD), + SPH_C64(0x6B6BDA147FDA6BBD), SPH_C64(0x8585ABD95CAB8592), + SPH_C64(0xBDBD733C8173BDDA), SPH_C64(0x5D5D348FD2345DE7), + SPH_C64(0x1010509080501030), SPH_C64(0xF4F40307F303F401), + SPH_C64(0xCBCBC0DD16C0CB40), SPH_C64(0x3E3EC6D3EDC63E42), + SPH_C64(0x0505112D2811050F), SPH_C64(0x6767E6781FE667A9), + SPH_C64(0xE4E453977353E431), SPH_C64(0x2727BB0225BB2769), + SPH_C64(0x41415873325841C3), SPH_C64(0x8B8B9DA72C9D8B80), + SPH_C64(0xA7A701F65101A7F4), SPH_C64(0x7D7D94B2CF947D87), + SPH_C64(0x9595FB49DCFB95A2), SPH_C64(0xD8D89F568E9FD875), + SPH_C64(0xFBFB30708B30FB10), SPH_C64(0xEEEE71CD2371EE2F), + SPH_C64(0x7C7C91BBC7917C84), SPH_C64(0x6666E37117E366AA), + SPH_C64(0xDDDD8E7BA68EDD7A), SPH_C64(0x17174BAFB84B1739), + SPH_C64(0x47474645024647C9), SPH_C64(0x9E9EDC1A84DC9EBF), + SPH_C64(0xCACAC5D41EC5CA43), SPH_C64(0x2D2D995875992D77), + SPH_C64(0xBFBF792E9179BFDC), SPH_C64(0x07071B3F381B0709), + SPH_C64(0xADAD23AC0123ADEA), SPH_C64(0x5A5A2FB0EA2F5AEE), + SPH_C64(0x8383B5EF6CB58398), SPH_C64(0x3333FFB685FF3355), + SPH_C64(0x6363F25C3FF263A5), SPH_C64(0x02020A12100A0206), + SPH_C64(0xAAAA38933938AAE3), SPH_C64(0x7171A8DEAFA87193), + SPH_C64(0xC8C8CFC60ECFC845), SPH_C64(0x19197DD1C87D192B), + SPH_C64(0x4949703B727049DB), SPH_C64(0xD9D99A5F869AD976), + SPH_C64(0xF2F21D31C31DF20B), SPH_C64(0xE3E348A84B48E338), + SPH_C64(0x5B5B2AB9E22A5BED), SPH_C64(0x888892BC34928885), + SPH_C64(0x9A9AC83EA4C89AB3), SPH_C64(0x2626BE0B2DBE266A), + SPH_C64(0x3232FABF8DFA3256), SPH_C64(0xB0B04A59E94AB0CD), + SPH_C64(0xE9E96AF21B6AE926), SPH_C64(0x0F0F337778330F11), + SPH_C64(0xD5D5A633E6A6D562), SPH_C64(0x8080BAF474BA809D), + SPH_C64(0xBEBE7C27997CBEDF), SPH_C64(0xCDCDDEEB26DECD4A), + SPH_C64(0x3434E489BDE4345C), SPH_C64(0x484875327A7548D8), + SPH_C64(0xFFFF2454AB24FF1C), SPH_C64(0x7A7A8F8DF78F7A8E), + SPH_C64(0x9090EA64F4EA90AD), SPH_C64(0x5F5F3E9DC23E5FE1), + SPH_C64(0x2020A03D1DA02060), SPH_C64(0x6868D50F67D568B8), + SPH_C64(0x1A1A72CAD0721A2E), SPH_C64(0xAEAE2CB7192CAEEF), + SPH_C64(0xB4B45E7DC95EB4C1), SPH_C64(0x545419CE9A1954FC), + SPH_C64(0x9393E57FECE593A8), SPH_C64(0x2222AA2F0DAA2266), + SPH_C64(0x6464E96307E964AC), SPH_C64(0xF1F1122ADB12F10E), + SPH_C64(0x7373A2CCBFA27395), SPH_C64(0x12125A82905A1236), + SPH_C64(0x40405D7A3A5D40C0), SPH_C64(0x0808284840280818), + SPH_C64(0xC3C3E89556E8C358), SPH_C64(0xECEC7BDF337BEC29), + SPH_C64(0xDBDB904D9690DB70), SPH_C64(0xA1A11FC0611FA1FE), + SPH_C64(0x8D8D83911C838D8A), SPH_C64(0x3D3DC9C8F5C93D47), + SPH_C64(0x9797F15BCCF197A4), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFD4F936D4CF4C), SPH_C64(0x2B2B876E45872B7D), + SPH_C64(0x7676B3E197B3769A), SPH_C64(0x8282B0E664B0829B), + SPH_C64(0xD6D6A928FEA9D667), SPH_C64(0x1B1B77C3D8771B2D), + SPH_C64(0xB5B55B74C15BB5C2), SPH_C64(0xAFAF29BE1129AFEC), + SPH_C64(0x6A6ADF1D77DF6ABE), SPH_C64(0x50500DEABA0D50F0), + SPH_C64(0x45454C57124C45CF), SPH_C64(0xF3F31838CB18F308), + SPH_C64(0x3030F0AD9DF03050), SPH_C64(0xEFEF74C42B74EF2C), + SPH_C64(0x3F3FC3DAE5C33F41), SPH_C64(0x55551CC7921C55FF), + SPH_C64(0xA2A210DB7910A2FB), SPH_C64(0xEAEA65E90365EA23), + SPH_C64(0x6565EC6A0FEC65AF), SPH_C64(0xBABA6803B968BAD3), + SPH_C64(0x2F2F934A65932F71), SPH_C64(0xC0C0E78E4EE7C05D), + SPH_C64(0xDEDE8160BE81DE7F), SPH_C64(0x1C1C6CFCE06C1C24), + SPH_C64(0xFDFD2E46BB2EFD1A), SPH_C64(0x4D4D641F52644DD7), + SPH_C64(0x9292E076E4E092AB), SPH_C64(0x7575BCFA8FBC759F), + SPH_C64(0x06061E36301E060A), SPH_C64(0x8A8A98AE24988A83), + SPH_C64(0xB2B2404BF940B2CB), SPH_C64(0xE6E659856359E637), + SPH_C64(0x0E0E367E70360E12), SPH_C64(0x1F1F63E7F8631F21), + SPH_C64(0x6262F75537F762A6), SPH_C64(0xD4D4A33AEEA3D461), + SPH_C64(0xA8A832812932A8E5), SPH_C64(0x9696F452C4F496A7), + SPH_C64(0xF9F93A629B3AF916), SPH_C64(0xC5C5F6A366F6C552), + SPH_C64(0x2525B11035B1256F), SPH_C64(0x595920ABF22059EB), + SPH_C64(0x8484AED054AE8491), SPH_C64(0x7272A7C5B7A77296), + SPH_C64(0x3939DDECD5DD394B), SPH_C64(0x4C4C61165A614CD4), + SPH_C64(0x5E5E3B94CA3B5EE2), SPH_C64(0x7878859FE7857888), + SPH_C64(0x3838D8E5DDD83848), SPH_C64(0x8C8C869814868C89), + SPH_C64(0xD1D1B217C6B2D16E), SPH_C64(0xA5A50BE4410BA5F2), + SPH_C64(0xE2E24DA1434DE23B), SPH_C64(0x6161F84E2FF861A3), + SPH_C64(0xB3B34542F145B3C8), SPH_C64(0x2121A53415A52163), + SPH_C64(0x9C9CD60894D69CB9), SPH_C64(0x1E1E66EEF0661E22), + SPH_C64(0x43435261225243C5), SPH_C64(0xC7C7FCB176FCC754), + SPH_C64(0xFCFC2B4FB32BFC19), SPH_C64(0x040414242014040C), + SPH_C64(0x515108E3B20851F3), SPH_C64(0x9999C725BCC799B6), + SPH_C64(0x6D6DC4224FC46DB7), SPH_C64(0x0D0D396568390D17), + SPH_C64(0xFAFA35798335FA13), SPH_C64(0xDFDF8469B684DF7C), + SPH_C64(0x7E7E9BA9D79B7E82), SPH_C64(0x2424B4193DB4246C), + SPH_C64(0x3B3BD7FEC5D73B4D), SPH_C64(0xABAB3D9A313DABE0), + SPH_C64(0xCECED1F03ED1CE4F), SPH_C64(0x1111559988551133), + SPH_C64(0x8F8F89830C898F8C), SPH_C64(0x4E4E6B044A6B4ED2), + SPH_C64(0xB7B75166D151B7C4), SPH_C64(0xEBEB60E00B60EB20), + SPH_C64(0x3C3CCCC1FDCC3C44), SPH_C64(0x8181BFFD7CBF819E), + SPH_C64(0x9494FE40D4FE94A1), SPH_C64(0xF7F70C1CEB0CF704), + SPH_C64(0xB9B96718A167B9D6), SPH_C64(0x13135F8B985F1335), + SPH_C64(0x2C2C9C517D9C2C74), SPH_C64(0xD3D3B805D6B8D368), + SPH_C64(0xE7E75C8C6B5CE734), SPH_C64(0x6E6ECB3957CB6EB2), + SPH_C64(0xC4C4F3AA6EF3C451), SPH_C64(0x03030F1B180F0305), + SPH_C64(0x565613DC8A1356FA), SPH_C64(0x4444495E1A4944CC), + SPH_C64(0x7F7F9EA0DF9E7F81), SPH_C64(0xA9A937882137A9E6), + SPH_C64(0x2A2A82674D822A7E), SPH_C64(0xBBBB6D0AB16DBBD0), + SPH_C64(0xC1C1E28746E2C15E), SPH_C64(0x535302F1A20253F5), + SPH_C64(0xDCDC8B72AE8BDC79), SPH_C64(0x0B0B275358270B1D), + SPH_C64(0x9D9DD3019CD39DBA), SPH_C64(0x6C6CC12B47C16CB4), + SPH_C64(0x3131F5A495F53153), SPH_C64(0x7474B9F387B9749C), + SPH_C64(0xF6F60915E309F607), SPH_C64(0x4646434C0A4346CA), + SPH_C64(0xACAC26A50926ACE9), SPH_C64(0x898997B53C978986), + SPH_C64(0x141444B4A044143C), SPH_C64(0xE1E142BA5B42E13E), + SPH_C64(0x16164EA6B04E163A), SPH_C64(0x3A3AD2F7CDD23A4E), + SPH_C64(0x6969D0066FD069BB), SPH_C64(0x09092D41482D091B), + SPH_C64(0x7070ADD7A7AD7090), SPH_C64(0xB6B6546FD954B6C7), + SPH_C64(0xD0D0B71ECEB7D06D), SPH_C64(0xEDED7ED63B7EED2A), + SPH_C64(0xCCCCDBE22EDBCC49), SPH_C64(0x424257682A5742C6), + SPH_C64(0x9898C22CB4C298B5), SPH_C64(0xA4A40EED490EA4F1), + SPH_C64(0x282888755D882878), SPH_C64(0x5C5C3186DA315CE4), + SPH_C64(0xF8F83F6B933FF815), SPH_C64(0x8686A4C244A48697) +}; + +static const uint64_t old1_T7[256] = { + SPH_C64(0x1878D8C078182818), SPH_C64(0x23AF2605AF236523), + SPH_C64(0xC6F9B87EF9C657C6), SPH_C64(0xE86FFB136FE825E8), + SPH_C64(0x87A1CB4CA1879487), SPH_C64(0xB86211A962B8D5B8), + SPH_C64(0x0105090805010301), SPH_C64(0x4F6E0D426E4FD14F), + SPH_C64(0x36EE9BADEE365A36), SPH_C64(0xA604FF5904A6F7A6), + SPH_C64(0xD2BD0CDEBDD26BD2), SPH_C64(0xF5060EFB06F502F5), + SPH_C64(0x798096EF80798B79), SPH_C64(0x6FCE305FCE6FB16F), + SPH_C64(0x91EF6DFCEF91AE91), SPH_C64(0x5207F8AA0752F652), + SPH_C64(0x60FD4727FD60A060), SPH_C64(0xBC76358976BCD9BC), + SPH_C64(0x9BCD37ACCD9BB09B), SPH_C64(0x8E8C8A048C8E8F8E), + SPH_C64(0xA315D27115A3F8A3), SPH_C64(0x0C3C6C603C0C140C), + SPH_C64(0x7B8A84FF8A7B8D7B), SPH_C64(0x35E180B5E1355F35), + SPH_C64(0x1D69F5E8691D271D), SPH_C64(0xE047B35347E03DE0), + SPH_C64(0xD7AC21F6ACD764D7), SPH_C64(0xC2ED9C5EEDC25BC2), + SPH_C64(0x2E96436D962E722E), SPH_C64(0x4B7A29627A4BDD4B), + SPH_C64(0xFE215DA321FE1FFE), SPH_C64(0x5716D5821657F957), + SPH_C64(0x1541BDA841153F15), SPH_C64(0x77B6E89FB6779977), + SPH_C64(0x37EB92A5EB375937), SPH_C64(0xE5569E7B56E532E5), + SPH_C64(0x9FD9138CD99FBC9F), SPH_C64(0xF01723D317F00DF0), + SPH_C64(0x4A7F206A7F4ADE4A), SPH_C64(0xDA95449E95DA73DA), + SPH_C64(0x5825A2FA2558E858), SPH_C64(0xC9CACF06CAC946C9), + SPH_C64(0x298D7C558D297B29), SPH_C64(0x0A225A50220A1E0A), + SPH_C64(0xB14F50E14FB1CEB1), SPH_C64(0xA01AC9691AA0FDA0), + SPH_C64(0x6BDA147FDA6BBD6B), SPH_C64(0x85ABD95CAB859285), + SPH_C64(0xBD733C8173BDDABD), SPH_C64(0x5D348FD2345DE75D), + SPH_C64(0x1050908050103010), SPH_C64(0xF40307F303F401F4), + SPH_C64(0xCBC0DD16C0CB40CB), SPH_C64(0x3EC6D3EDC63E423E), + SPH_C64(0x05112D2811050F05), SPH_C64(0x67E6781FE667A967), + SPH_C64(0xE453977353E431E4), SPH_C64(0x27BB0225BB276927), + SPH_C64(0x415873325841C341), SPH_C64(0x8B9DA72C9D8B808B), + SPH_C64(0xA701F65101A7F4A7), SPH_C64(0x7D94B2CF947D877D), + SPH_C64(0x95FB49DCFB95A295), SPH_C64(0xD89F568E9FD875D8), + SPH_C64(0xFB30708B30FB10FB), SPH_C64(0xEE71CD2371EE2FEE), + SPH_C64(0x7C91BBC7917C847C), SPH_C64(0x66E37117E366AA66), + SPH_C64(0xDD8E7BA68EDD7ADD), SPH_C64(0x174BAFB84B173917), + SPH_C64(0x474645024647C947), SPH_C64(0x9EDC1A84DC9EBF9E), + SPH_C64(0xCAC5D41EC5CA43CA), SPH_C64(0x2D995875992D772D), + SPH_C64(0xBF792E9179BFDCBF), SPH_C64(0x071B3F381B070907), + SPH_C64(0xAD23AC0123ADEAAD), SPH_C64(0x5A2FB0EA2F5AEE5A), + SPH_C64(0x83B5EF6CB5839883), SPH_C64(0x33FFB685FF335533), + SPH_C64(0x63F25C3FF263A563), SPH_C64(0x020A12100A020602), + SPH_C64(0xAA38933938AAE3AA), SPH_C64(0x71A8DEAFA8719371), + SPH_C64(0xC8CFC60ECFC845C8), SPH_C64(0x197DD1C87D192B19), + SPH_C64(0x49703B727049DB49), SPH_C64(0xD99A5F869AD976D9), + SPH_C64(0xF21D31C31DF20BF2), SPH_C64(0xE348A84B48E338E3), + SPH_C64(0x5B2AB9E22A5BED5B), SPH_C64(0x8892BC3492888588), + SPH_C64(0x9AC83EA4C89AB39A), SPH_C64(0x26BE0B2DBE266A26), + SPH_C64(0x32FABF8DFA325632), SPH_C64(0xB04A59E94AB0CDB0), + SPH_C64(0xE96AF21B6AE926E9), SPH_C64(0x0F337778330F110F), + SPH_C64(0xD5A633E6A6D562D5), SPH_C64(0x80BAF474BA809D80), + SPH_C64(0xBE7C27997CBEDFBE), SPH_C64(0xCDDEEB26DECD4ACD), + SPH_C64(0x34E489BDE4345C34), SPH_C64(0x4875327A7548D848), + SPH_C64(0xFF2454AB24FF1CFF), SPH_C64(0x7A8F8DF78F7A8E7A), + SPH_C64(0x90EA64F4EA90AD90), SPH_C64(0x5F3E9DC23E5FE15F), + SPH_C64(0x20A03D1DA0206020), SPH_C64(0x68D50F67D568B868), + SPH_C64(0x1A72CAD0721A2E1A), SPH_C64(0xAE2CB7192CAEEFAE), + SPH_C64(0xB45E7DC95EB4C1B4), SPH_C64(0x5419CE9A1954FC54), + SPH_C64(0x93E57FECE593A893), SPH_C64(0x22AA2F0DAA226622), + SPH_C64(0x64E96307E964AC64), SPH_C64(0xF1122ADB12F10EF1), + SPH_C64(0x73A2CCBFA2739573), SPH_C64(0x125A82905A123612), + SPH_C64(0x405D7A3A5D40C040), SPH_C64(0x0828484028081808), + SPH_C64(0xC3E89556E8C358C3), SPH_C64(0xEC7BDF337BEC29EC), + SPH_C64(0xDB904D9690DB70DB), SPH_C64(0xA11FC0611FA1FEA1), + SPH_C64(0x8D83911C838D8A8D), SPH_C64(0x3DC9C8F5C93D473D), + SPH_C64(0x97F15BCCF197A497), SPH_C64(0x0000000000000000), + SPH_C64(0xCFD4F936D4CF4CCF), SPH_C64(0x2B876E45872B7D2B), + SPH_C64(0x76B3E197B3769A76), SPH_C64(0x82B0E664B0829B82), + SPH_C64(0xD6A928FEA9D667D6), SPH_C64(0x1B77C3D8771B2D1B), + SPH_C64(0xB55B74C15BB5C2B5), SPH_C64(0xAF29BE1129AFECAF), + SPH_C64(0x6ADF1D77DF6ABE6A), SPH_C64(0x500DEABA0D50F050), + SPH_C64(0x454C57124C45CF45), SPH_C64(0xF31838CB18F308F3), + SPH_C64(0x30F0AD9DF0305030), SPH_C64(0xEF74C42B74EF2CEF), + SPH_C64(0x3FC3DAE5C33F413F), SPH_C64(0x551CC7921C55FF55), + SPH_C64(0xA210DB7910A2FBA2), SPH_C64(0xEA65E90365EA23EA), + SPH_C64(0x65EC6A0FEC65AF65), SPH_C64(0xBA6803B968BAD3BA), + SPH_C64(0x2F934A65932F712F), SPH_C64(0xC0E78E4EE7C05DC0), + SPH_C64(0xDE8160BE81DE7FDE), SPH_C64(0x1C6CFCE06C1C241C), + SPH_C64(0xFD2E46BB2EFD1AFD), SPH_C64(0x4D641F52644DD74D), + SPH_C64(0x92E076E4E092AB92), SPH_C64(0x75BCFA8FBC759F75), + SPH_C64(0x061E36301E060A06), SPH_C64(0x8A98AE24988A838A), + SPH_C64(0xB2404BF940B2CBB2), SPH_C64(0xE659856359E637E6), + SPH_C64(0x0E367E70360E120E), SPH_C64(0x1F63E7F8631F211F), + SPH_C64(0x62F75537F762A662), SPH_C64(0xD4A33AEEA3D461D4), + SPH_C64(0xA832812932A8E5A8), SPH_C64(0x96F452C4F496A796), + SPH_C64(0xF93A629B3AF916F9), SPH_C64(0xC5F6A366F6C552C5), + SPH_C64(0x25B11035B1256F25), SPH_C64(0x5920ABF22059EB59), + SPH_C64(0x84AED054AE849184), SPH_C64(0x72A7C5B7A7729672), + SPH_C64(0x39DDECD5DD394B39), SPH_C64(0x4C61165A614CD44C), + SPH_C64(0x5E3B94CA3B5EE25E), SPH_C64(0x78859FE785788878), + SPH_C64(0x38D8E5DDD8384838), SPH_C64(0x8C869814868C898C), + SPH_C64(0xD1B217C6B2D16ED1), SPH_C64(0xA50BE4410BA5F2A5), + SPH_C64(0xE24DA1434DE23BE2), SPH_C64(0x61F84E2FF861A361), + SPH_C64(0xB34542F145B3C8B3), SPH_C64(0x21A53415A5216321), + SPH_C64(0x9CD60894D69CB99C), SPH_C64(0x1E66EEF0661E221E), + SPH_C64(0x435261225243C543), SPH_C64(0xC7FCB176FCC754C7), + SPH_C64(0xFC2B4FB32BFC19FC), SPH_C64(0x0414242014040C04), + SPH_C64(0x5108E3B20851F351), SPH_C64(0x99C725BCC799B699), + SPH_C64(0x6DC4224FC46DB76D), SPH_C64(0x0D396568390D170D), + SPH_C64(0xFA35798335FA13FA), SPH_C64(0xDF8469B684DF7CDF), + SPH_C64(0x7E9BA9D79B7E827E), SPH_C64(0x24B4193DB4246C24), + SPH_C64(0x3BD7FEC5D73B4D3B), SPH_C64(0xAB3D9A313DABE0AB), + SPH_C64(0xCED1F03ED1CE4FCE), SPH_C64(0x1155998855113311), + SPH_C64(0x8F89830C898F8C8F), SPH_C64(0x4E6B044A6B4ED24E), + SPH_C64(0xB75166D151B7C4B7), SPH_C64(0xEB60E00B60EB20EB), + SPH_C64(0x3CCCC1FDCC3C443C), SPH_C64(0x81BFFD7CBF819E81), + SPH_C64(0x94FE40D4FE94A194), SPH_C64(0xF70C1CEB0CF704F7), + SPH_C64(0xB96718A167B9D6B9), SPH_C64(0x135F8B985F133513), + SPH_C64(0x2C9C517D9C2C742C), SPH_C64(0xD3B805D6B8D368D3), + SPH_C64(0xE75C8C6B5CE734E7), SPH_C64(0x6ECB3957CB6EB26E), + SPH_C64(0xC4F3AA6EF3C451C4), SPH_C64(0x030F1B180F030503), + SPH_C64(0x5613DC8A1356FA56), SPH_C64(0x44495E1A4944CC44), + SPH_C64(0x7F9EA0DF9E7F817F), SPH_C64(0xA937882137A9E6A9), + SPH_C64(0x2A82674D822A7E2A), SPH_C64(0xBB6D0AB16DBBD0BB), + SPH_C64(0xC1E28746E2C15EC1), SPH_C64(0x5302F1A20253F553), + SPH_C64(0xDC8B72AE8BDC79DC), SPH_C64(0x0B275358270B1D0B), + SPH_C64(0x9DD3019CD39DBA9D), SPH_C64(0x6CC12B47C16CB46C), + SPH_C64(0x31F5A495F5315331), SPH_C64(0x74B9F387B9749C74), + SPH_C64(0xF60915E309F607F6), SPH_C64(0x46434C0A4346CA46), + SPH_C64(0xAC26A50926ACE9AC), SPH_C64(0x8997B53C97898689), + SPH_C64(0x1444B4A044143C14), SPH_C64(0xE142BA5B42E13EE1), + SPH_C64(0x164EA6B04E163A16), SPH_C64(0x3AD2F7CDD23A4E3A), + SPH_C64(0x69D0066FD069BB69), SPH_C64(0x092D41482D091B09), + SPH_C64(0x70ADD7A7AD709070), SPH_C64(0xB6546FD954B6C7B6), + SPH_C64(0xD0B71ECEB7D06DD0), SPH_C64(0xED7ED63B7EED2AED), + SPH_C64(0xCCDBE22EDBCC49CC), SPH_C64(0x4257682A5742C642), + SPH_C64(0x98C22CB4C298B598), SPH_C64(0xA40EED490EA4F1A4), + SPH_C64(0x2888755D88287828), SPH_C64(0x5C3186DA315CE45C), + SPH_C64(0xF83F6B933FF815F8), SPH_C64(0x86A4C244A4869786) +}; + + + +static const uint64_t old1_RC[10] = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + + + + + +static const uint64_t plain_T0[256] = { + SPH_C64(0xD83078C018601818), SPH_C64(0x2646AF05238C2323), + SPH_C64(0xB891F97EC63FC6C6), SPH_C64(0xFBCD6F13E887E8E8), + SPH_C64(0xCB13A14C87268787), SPH_C64(0x116D62A9B8DAB8B8), + SPH_C64(0x0902050801040101), SPH_C64(0x0D9E6E424F214F4F), + SPH_C64(0x9B6CEEAD36D83636), SPH_C64(0xFF510459A6A2A6A6), + SPH_C64(0x0CB9BDDED26FD2D2), SPH_C64(0x0EF706FBF5F3F5F5), + SPH_C64(0x96F280EF79F97979), SPH_C64(0x30DECE5F6FA16F6F), + SPH_C64(0x6D3FEFFC917E9191), SPH_C64(0xF8A407AA52555252), + SPH_C64(0x47C0FD27609D6060), SPH_C64(0x35657689BCCABCBC), + SPH_C64(0x372BCDAC9B569B9B), SPH_C64(0x8A018C048E028E8E), + SPH_C64(0xD25B1571A3B6A3A3), SPH_C64(0x6C183C600C300C0C), + SPH_C64(0x84F68AFF7BF17B7B), SPH_C64(0x806AE1B535D43535), + SPH_C64(0xF53A69E81D741D1D), SPH_C64(0xB3DD4753E0A7E0E0), + SPH_C64(0x21B3ACF6D77BD7D7), SPH_C64(0x9C99ED5EC22FC2C2), + SPH_C64(0x435C966D2EB82E2E), SPH_C64(0x29967A624B314B4B), + SPH_C64(0x5DE121A3FEDFFEFE), SPH_C64(0xD5AE168257415757), + SPH_C64(0xBD2A41A815541515), SPH_C64(0xE8EEB69F77C17777), + SPH_C64(0x926EEBA537DC3737), SPH_C64(0x9ED7567BE5B3E5E5), + SPH_C64(0x1323D98C9F469F9F), SPH_C64(0x23FD17D3F0E7F0F0), + SPH_C64(0x20947F6A4A354A4A), SPH_C64(0x44A9959EDA4FDADA), + SPH_C64(0xA2B025FA587D5858), SPH_C64(0xCF8FCA06C903C9C9), + SPH_C64(0x7C528D5529A42929), SPH_C64(0x5A1422500A280A0A), + SPH_C64(0x507F4FE1B1FEB1B1), SPH_C64(0xC95D1A69A0BAA0A0), + SPH_C64(0x14D6DA7F6BB16B6B), SPH_C64(0xD917AB5C852E8585), + SPH_C64(0x3C677381BDCEBDBD), SPH_C64(0x8FBA34D25D695D5D), + SPH_C64(0x9020508010401010), SPH_C64(0x07F503F3F4F7F4F4), + SPH_C64(0xDD8BC016CB0BCBCB), SPH_C64(0xD37CC6ED3EF83E3E), + SPH_C64(0x2D0A112805140505), SPH_C64(0x78CEE61F67816767), + SPH_C64(0x97D55373E4B7E4E4), SPH_C64(0x024EBB25279C2727), + SPH_C64(0x7382583241194141), SPH_C64(0xA70B9D2C8B168B8B), + SPH_C64(0xF6530151A7A6A7A7), SPH_C64(0xB2FA94CF7DE97D7D), + SPH_C64(0x4937FBDC956E9595), SPH_C64(0x56AD9F8ED847D8D8), + SPH_C64(0x70EB308BFBCBFBFB), SPH_C64(0xCDC17123EE9FEEEE), + SPH_C64(0xBBF891C77CED7C7C), SPH_C64(0x71CCE31766856666), + SPH_C64(0x7BA78EA6DD53DDDD), SPH_C64(0xAF2E4BB8175C1717), + SPH_C64(0x458E460247014747), SPH_C64(0x1A21DC849E429E9E), + SPH_C64(0xD489C51ECA0FCACA), SPH_C64(0x585A99752DB42D2D), + SPH_C64(0x2E637991BFC6BFBF), SPH_C64(0x3F0E1B38071C0707), + SPH_C64(0xAC472301AD8EADAD), SPH_C64(0xB0B42FEA5A755A5A), + SPH_C64(0xEF1BB56C83368383), SPH_C64(0xB666FF8533CC3333), + SPH_C64(0x5CC6F23F63916363), SPH_C64(0x12040A1002080202), + SPH_C64(0x93493839AA92AAAA), SPH_C64(0xDEE2A8AF71D97171), + SPH_C64(0xC68DCF0EC807C8C8), SPH_C64(0xD1327DC819641919), + SPH_C64(0x3B92707249394949), SPH_C64(0x5FAF9A86D943D9D9), + SPH_C64(0x31F91DC3F2EFF2F2), SPH_C64(0xA8DB484BE3ABE3E3), + SPH_C64(0xB9B62AE25B715B5B), SPH_C64(0xBC0D9234881A8888), + SPH_C64(0x3E29C8A49A529A9A), SPH_C64(0x0B4CBE2D26982626), + SPH_C64(0xBF64FA8D32C83232), SPH_C64(0x597D4AE9B0FAB0B0), + SPH_C64(0xF2CF6A1BE983E9E9), SPH_C64(0x771E33780F3C0F0F), + SPH_C64(0x33B7A6E6D573D5D5), SPH_C64(0xF41DBA74803A8080), + SPH_C64(0x27617C99BEC2BEBE), SPH_C64(0xEB87DE26CD13CDCD), + SPH_C64(0x8968E4BD34D03434), SPH_C64(0x3290757A483D4848), + SPH_C64(0x54E324ABFFDBFFFF), SPH_C64(0x8DF48FF77AF57A7A), + SPH_C64(0x643DEAF4907A9090), SPH_C64(0x9DBE3EC25F615F5F), + SPH_C64(0x3D40A01D20802020), SPH_C64(0x0FD0D56768BD6868), + SPH_C64(0xCA3472D01A681A1A), SPH_C64(0xB7412C19AE82AEAE), + SPH_C64(0x7D755EC9B4EAB4B4), SPH_C64(0xCEA8199A544D5454), + SPH_C64(0x7F3BE5EC93769393), SPH_C64(0x2F44AA0D22882222), + SPH_C64(0x63C8E907648D6464), SPH_C64(0x2AFF12DBF1E3F1F1), + SPH_C64(0xCCE6A2BF73D17373), SPH_C64(0x82245A9012481212), + SPH_C64(0x7A805D3A401D4040), SPH_C64(0x4810284008200808), + SPH_C64(0x959BE856C32BC3C3), SPH_C64(0xDFC57B33EC97ECEC), + SPH_C64(0x4DAB9096DB4BDBDB), SPH_C64(0xC05F1F61A1BEA1A1), + SPH_C64(0x9107831C8D0E8D8D), SPH_C64(0xC87AC9F53DF43D3D), + SPH_C64(0x5B33F1CC97669797), SPH_C64(0x0000000000000000), + SPH_C64(0xF983D436CF1BCFCF), SPH_C64(0x6E5687452BAC2B2B), + SPH_C64(0xE1ECB39776C57676), SPH_C64(0xE619B06482328282), + SPH_C64(0x28B1A9FED67FD6D6), SPH_C64(0xC33677D81B6C1B1B), + SPH_C64(0x74775BC1B5EEB5B5), SPH_C64(0xBE432911AF86AFAF), + SPH_C64(0x1DD4DF776AB56A6A), SPH_C64(0xEAA00DBA505D5050), + SPH_C64(0x578A4C1245094545), SPH_C64(0x38FB18CBF3EBF3F3), + SPH_C64(0xAD60F09D30C03030), SPH_C64(0xC4C3742BEF9BEFEF), + SPH_C64(0xDA7EC3E53FFC3F3F), SPH_C64(0xC7AA1C9255495555), + SPH_C64(0xDB591079A2B2A2A2), SPH_C64(0xE9C96503EA8FEAEA), + SPH_C64(0x6ACAEC0F65896565), SPH_C64(0x036968B9BAD2BABA), + SPH_C64(0x4A5E93652FBC2F2F), SPH_C64(0x8E9DE74EC027C0C0), + SPH_C64(0x60A181BEDE5FDEDE), SPH_C64(0xFC386CE01C701C1C), + SPH_C64(0x46E72EBBFDD3FDFD), SPH_C64(0x1F9A64524D294D4D), + SPH_C64(0x7639E0E492729292), SPH_C64(0xFAEABC8F75C97575), + SPH_C64(0x360C1E3006180606), SPH_C64(0xAE0998248A128A8A), + SPH_C64(0x4B7940F9B2F2B2B2), SPH_C64(0x85D15963E6BFE6E6), + SPH_C64(0x7E1C36700E380E0E), SPH_C64(0xE73E63F81F7C1F1F), + SPH_C64(0x55C4F73762956262), SPH_C64(0x3AB5A3EED477D4D4), + SPH_C64(0x814D3229A89AA8A8), SPH_C64(0x5231F4C496629696), + SPH_C64(0x62EF3A9BF9C3F9F9), SPH_C64(0xA397F666C533C5C5), + SPH_C64(0x104AB13525942525), SPH_C64(0xABB220F259795959), + SPH_C64(0xD015AE54842A8484), SPH_C64(0xC5E4A7B772D57272), + SPH_C64(0xEC72DDD539E43939), SPH_C64(0x1698615A4C2D4C4C), + SPH_C64(0x94BC3BCA5E655E5E), SPH_C64(0x9FF085E778FD7878), + SPH_C64(0xE570D8DD38E03838), SPH_C64(0x980586148C0A8C8C), + SPH_C64(0x17BFB2C6D163D1D1), SPH_C64(0xE4570B41A5AEA5A5), + SPH_C64(0xA1D94D43E2AFE2E2), SPH_C64(0x4EC2F82F61996161), + SPH_C64(0x427B45F1B3F6B3B3), SPH_C64(0x3442A51521842121), + SPH_C64(0x0825D6949C4A9C9C), SPH_C64(0xEE3C66F01E781E1E), + SPH_C64(0x6186522243114343), SPH_C64(0xB193FC76C73BC7C7), + SPH_C64(0x4FE52BB3FCD7FCFC), SPH_C64(0x2408142004100404), + SPH_C64(0xE3A208B251595151), SPH_C64(0x252FC7BC995E9999), + SPH_C64(0x22DAC44F6DA96D6D), SPH_C64(0x651A39680D340D0D), + SPH_C64(0x79E93583FACFFAFA), SPH_C64(0x69A384B6DF5BDFDF), + SPH_C64(0xA9FC9BD77EE57E7E), SPH_C64(0x1948B43D24902424), + SPH_C64(0xFE76D7C53BEC3B3B), SPH_C64(0x9A4B3D31AB96ABAB), + SPH_C64(0xF081D13ECE1FCECE), SPH_C64(0x9922558811441111), + SPH_C64(0x8303890C8F068F8F), SPH_C64(0x049C6B4A4E254E4E), + SPH_C64(0x667351D1B7E6B7B7), SPH_C64(0xE0CB600BEB8BEBEB), + SPH_C64(0xC178CCFD3CF03C3C), SPH_C64(0xFD1FBF7C813E8181), + SPH_C64(0x4035FED4946A9494), SPH_C64(0x1CF30CEBF7FBF7F7), + SPH_C64(0x186F67A1B9DEB9B9), SPH_C64(0x8B265F98134C1313), + SPH_C64(0x51589C7D2CB02C2C), SPH_C64(0x05BBB8D6D36BD3D3), + SPH_C64(0x8CD35C6BE7BBE7E7), SPH_C64(0x39DCCB576EA56E6E), + SPH_C64(0xAA95F36EC437C4C4), SPH_C64(0x1B060F18030C0303), + SPH_C64(0xDCAC138A56455656), SPH_C64(0x5E88491A440D4444), + SPH_C64(0xA0FE9EDF7FE17F7F), SPH_C64(0x884F3721A99EA9A9), + SPH_C64(0x6754824D2AA82A2A), SPH_C64(0x0A6B6DB1BBD6BBBB), + SPH_C64(0x879FE246C123C1C1), SPH_C64(0xF1A602A253515353), + SPH_C64(0x72A58BAEDC57DCDC), SPH_C64(0x531627580B2C0B0B), + SPH_C64(0x0127D39C9D4E9D9D), SPH_C64(0x2BD8C1476CAD6C6C), + SPH_C64(0xA462F59531C43131), SPH_C64(0xF3E8B98774CD7474), + SPH_C64(0x15F109E3F6FFF6F6), SPH_C64(0x4C8C430A46054646), + SPH_C64(0xA5452609AC8AACAC), SPH_C64(0xB50F973C891E8989), + SPH_C64(0xB42844A014501414), SPH_C64(0xBADF425BE1A3E1E1), + SPH_C64(0xA62C4EB016581616), SPH_C64(0xF774D2CD3AE83A3A), + SPH_C64(0x06D2D06F69B96969), SPH_C64(0x41122D4809240909), + SPH_C64(0xD7E0ADA770DD7070), SPH_C64(0x6F7154D9B6E2B6B6), + SPH_C64(0x1EBDB7CED067D0D0), SPH_C64(0xD6C77E3BED93EDED), + SPH_C64(0xE285DB2ECC17CCCC), SPH_C64(0x6884572A42154242), + SPH_C64(0x2C2DC2B4985A9898), SPH_C64(0xED550E49A4AAA4A4), + SPH_C64(0x7550885D28A02828), SPH_C64(0x86B831DA5C6D5C5C), + SPH_C64(0x6BED3F93F8C7F8F8), SPH_C64(0xC211A44486228686) +}; + +static const uint64_t plain_T1[256] = { + SPH_C64(0x3078C018601818D8), SPH_C64(0x46AF05238C232326), + SPH_C64(0x91F97EC63FC6C6B8), SPH_C64(0xCD6F13E887E8E8FB), + SPH_C64(0x13A14C87268787CB), SPH_C64(0x6D62A9B8DAB8B811), + SPH_C64(0x0205080104010109), SPH_C64(0x9E6E424F214F4F0D), + SPH_C64(0x6CEEAD36D836369B), SPH_C64(0x510459A6A2A6A6FF), + SPH_C64(0xB9BDDED26FD2D20C), SPH_C64(0xF706FBF5F3F5F50E), + SPH_C64(0xF280EF79F9797996), SPH_C64(0xDECE5F6FA16F6F30), + SPH_C64(0x3FEFFC917E91916D), SPH_C64(0xA407AA52555252F8), + SPH_C64(0xC0FD27609D606047), SPH_C64(0x657689BCCABCBC35), + SPH_C64(0x2BCDAC9B569B9B37), SPH_C64(0x018C048E028E8E8A), + SPH_C64(0x5B1571A3B6A3A3D2), SPH_C64(0x183C600C300C0C6C), + SPH_C64(0xF68AFF7BF17B7B84), SPH_C64(0x6AE1B535D4353580), + SPH_C64(0x3A69E81D741D1DF5), SPH_C64(0xDD4753E0A7E0E0B3), + SPH_C64(0xB3ACF6D77BD7D721), SPH_C64(0x99ED5EC22FC2C29C), + SPH_C64(0x5C966D2EB82E2E43), SPH_C64(0x967A624B314B4B29), + SPH_C64(0xE121A3FEDFFEFE5D), SPH_C64(0xAE168257415757D5), + SPH_C64(0x2A41A815541515BD), SPH_C64(0xEEB69F77C17777E8), + SPH_C64(0x6EEBA537DC373792), SPH_C64(0xD7567BE5B3E5E59E), + SPH_C64(0x23D98C9F469F9F13), SPH_C64(0xFD17D3F0E7F0F023), + SPH_C64(0x947F6A4A354A4A20), SPH_C64(0xA9959EDA4FDADA44), + SPH_C64(0xB025FA587D5858A2), SPH_C64(0x8FCA06C903C9C9CF), + SPH_C64(0x528D5529A429297C), SPH_C64(0x1422500A280A0A5A), + SPH_C64(0x7F4FE1B1FEB1B150), SPH_C64(0x5D1A69A0BAA0A0C9), + SPH_C64(0xD6DA7F6BB16B6B14), SPH_C64(0x17AB5C852E8585D9), + SPH_C64(0x677381BDCEBDBD3C), SPH_C64(0xBA34D25D695D5D8F), + SPH_C64(0x2050801040101090), SPH_C64(0xF503F3F4F7F4F407), + SPH_C64(0x8BC016CB0BCBCBDD), SPH_C64(0x7CC6ED3EF83E3ED3), + SPH_C64(0x0A1128051405052D), SPH_C64(0xCEE61F6781676778), + SPH_C64(0xD55373E4B7E4E497), SPH_C64(0x4EBB25279C272702), + SPH_C64(0x8258324119414173), SPH_C64(0x0B9D2C8B168B8BA7), + SPH_C64(0x530151A7A6A7A7F6), SPH_C64(0xFA94CF7DE97D7DB2), + SPH_C64(0x37FBDC956E959549), SPH_C64(0xAD9F8ED847D8D856), + SPH_C64(0xEB308BFBCBFBFB70), SPH_C64(0xC17123EE9FEEEECD), + SPH_C64(0xF891C77CED7C7CBB), SPH_C64(0xCCE3176685666671), + SPH_C64(0xA78EA6DD53DDDD7B), SPH_C64(0x2E4BB8175C1717AF), + SPH_C64(0x8E46024701474745), SPH_C64(0x21DC849E429E9E1A), + SPH_C64(0x89C51ECA0FCACAD4), SPH_C64(0x5A99752DB42D2D58), + SPH_C64(0x637991BFC6BFBF2E), SPH_C64(0x0E1B38071C07073F), + SPH_C64(0x472301AD8EADADAC), SPH_C64(0xB42FEA5A755A5AB0), + SPH_C64(0x1BB56C83368383EF), SPH_C64(0x66FF8533CC3333B6), + SPH_C64(0xC6F23F639163635C), SPH_C64(0x040A100208020212), + SPH_C64(0x493839AA92AAAA93), SPH_C64(0xE2A8AF71D97171DE), + SPH_C64(0x8DCF0EC807C8C8C6), SPH_C64(0x327DC819641919D1), + SPH_C64(0x927072493949493B), SPH_C64(0xAF9A86D943D9D95F), + SPH_C64(0xF91DC3F2EFF2F231), SPH_C64(0xDB484BE3ABE3E3A8), + SPH_C64(0xB62AE25B715B5BB9), SPH_C64(0x0D9234881A8888BC), + SPH_C64(0x29C8A49A529A9A3E), SPH_C64(0x4CBE2D269826260B), + SPH_C64(0x64FA8D32C83232BF), SPH_C64(0x7D4AE9B0FAB0B059), + SPH_C64(0xCF6A1BE983E9E9F2), SPH_C64(0x1E33780F3C0F0F77), + SPH_C64(0xB7A6E6D573D5D533), SPH_C64(0x1DBA74803A8080F4), + SPH_C64(0x617C99BEC2BEBE27), SPH_C64(0x87DE26CD13CDCDEB), + SPH_C64(0x68E4BD34D0343489), SPH_C64(0x90757A483D484832), + SPH_C64(0xE324ABFFDBFFFF54), SPH_C64(0xF48FF77AF57A7A8D), + SPH_C64(0x3DEAF4907A909064), SPH_C64(0xBE3EC25F615F5F9D), + SPH_C64(0x40A01D208020203D), SPH_C64(0xD0D56768BD68680F), + SPH_C64(0x3472D01A681A1ACA), SPH_C64(0x412C19AE82AEAEB7), + SPH_C64(0x755EC9B4EAB4B47D), SPH_C64(0xA8199A544D5454CE), + SPH_C64(0x3BE5EC937693937F), SPH_C64(0x44AA0D228822222F), + SPH_C64(0xC8E907648D646463), SPH_C64(0xFF12DBF1E3F1F12A), + SPH_C64(0xE6A2BF73D17373CC), SPH_C64(0x245A901248121282), + SPH_C64(0x805D3A401D40407A), SPH_C64(0x1028400820080848), + SPH_C64(0x9BE856C32BC3C395), SPH_C64(0xC57B33EC97ECECDF), + SPH_C64(0xAB9096DB4BDBDB4D), SPH_C64(0x5F1F61A1BEA1A1C0), + SPH_C64(0x07831C8D0E8D8D91), SPH_C64(0x7AC9F53DF43D3DC8), + SPH_C64(0x33F1CC976697975B), SPH_C64(0x0000000000000000), + SPH_C64(0x83D436CF1BCFCFF9), SPH_C64(0x5687452BAC2B2B6E), + SPH_C64(0xECB39776C57676E1), SPH_C64(0x19B06482328282E6), + SPH_C64(0xB1A9FED67FD6D628), SPH_C64(0x3677D81B6C1B1BC3), + SPH_C64(0x775BC1B5EEB5B574), SPH_C64(0x432911AF86AFAFBE), + SPH_C64(0xD4DF776AB56A6A1D), SPH_C64(0xA00DBA505D5050EA), + SPH_C64(0x8A4C124509454557), SPH_C64(0xFB18CBF3EBF3F338), + SPH_C64(0x60F09D30C03030AD), SPH_C64(0xC3742BEF9BEFEFC4), + SPH_C64(0x7EC3E53FFC3F3FDA), SPH_C64(0xAA1C9255495555C7), + SPH_C64(0x591079A2B2A2A2DB), SPH_C64(0xC96503EA8FEAEAE9), + SPH_C64(0xCAEC0F658965656A), SPH_C64(0x6968B9BAD2BABA03), + SPH_C64(0x5E93652FBC2F2F4A), SPH_C64(0x9DE74EC027C0C08E), + SPH_C64(0xA181BEDE5FDEDE60), SPH_C64(0x386CE01C701C1CFC), + SPH_C64(0xE72EBBFDD3FDFD46), SPH_C64(0x9A64524D294D4D1F), + SPH_C64(0x39E0E49272929276), SPH_C64(0xEABC8F75C97575FA), + SPH_C64(0x0C1E300618060636), SPH_C64(0x0998248A128A8AAE), + SPH_C64(0x7940F9B2F2B2B24B), SPH_C64(0xD15963E6BFE6E685), + SPH_C64(0x1C36700E380E0E7E), SPH_C64(0x3E63F81F7C1F1FE7), + SPH_C64(0xC4F7376295626255), SPH_C64(0xB5A3EED477D4D43A), + SPH_C64(0x4D3229A89AA8A881), SPH_C64(0x31F4C49662969652), + SPH_C64(0xEF3A9BF9C3F9F962), SPH_C64(0x97F666C533C5C5A3), + SPH_C64(0x4AB1352594252510), SPH_C64(0xB220F259795959AB), + SPH_C64(0x15AE54842A8484D0), SPH_C64(0xE4A7B772D57272C5), + SPH_C64(0x72DDD539E43939EC), SPH_C64(0x98615A4C2D4C4C16), + SPH_C64(0xBC3BCA5E655E5E94), SPH_C64(0xF085E778FD78789F), + SPH_C64(0x70D8DD38E03838E5), SPH_C64(0x0586148C0A8C8C98), + SPH_C64(0xBFB2C6D163D1D117), SPH_C64(0x570B41A5AEA5A5E4), + SPH_C64(0xD94D43E2AFE2E2A1), SPH_C64(0xC2F82F619961614E), + SPH_C64(0x7B45F1B3F6B3B342), SPH_C64(0x42A5152184212134), + SPH_C64(0x25D6949C4A9C9C08), SPH_C64(0x3C66F01E781E1EEE), + SPH_C64(0x8652224311434361), SPH_C64(0x93FC76C73BC7C7B1), + SPH_C64(0xE52BB3FCD7FCFC4F), SPH_C64(0x0814200410040424), + SPH_C64(0xA208B251595151E3), SPH_C64(0x2FC7BC995E999925), + SPH_C64(0xDAC44F6DA96D6D22), SPH_C64(0x1A39680D340D0D65), + SPH_C64(0xE93583FACFFAFA79), SPH_C64(0xA384B6DF5BDFDF69), + SPH_C64(0xFC9BD77EE57E7EA9), SPH_C64(0x48B43D2490242419), + SPH_C64(0x76D7C53BEC3B3BFE), SPH_C64(0x4B3D31AB96ABAB9A), + SPH_C64(0x81D13ECE1FCECEF0), SPH_C64(0x2255881144111199), + SPH_C64(0x03890C8F068F8F83), SPH_C64(0x9C6B4A4E254E4E04), + SPH_C64(0x7351D1B7E6B7B766), SPH_C64(0xCB600BEB8BEBEBE0), + SPH_C64(0x78CCFD3CF03C3CC1), SPH_C64(0x1FBF7C813E8181FD), + SPH_C64(0x35FED4946A949440), SPH_C64(0xF30CEBF7FBF7F71C), + SPH_C64(0x6F67A1B9DEB9B918), SPH_C64(0x265F98134C13138B), + SPH_C64(0x589C7D2CB02C2C51), SPH_C64(0xBBB8D6D36BD3D305), + SPH_C64(0xD35C6BE7BBE7E78C), SPH_C64(0xDCCB576EA56E6E39), + SPH_C64(0x95F36EC437C4C4AA), SPH_C64(0x060F18030C03031B), + SPH_C64(0xAC138A56455656DC), SPH_C64(0x88491A440D44445E), + SPH_C64(0xFE9EDF7FE17F7FA0), SPH_C64(0x4F3721A99EA9A988), + SPH_C64(0x54824D2AA82A2A67), SPH_C64(0x6B6DB1BBD6BBBB0A), + SPH_C64(0x9FE246C123C1C187), SPH_C64(0xA602A253515353F1), + SPH_C64(0xA58BAEDC57DCDC72), SPH_C64(0x1627580B2C0B0B53), + SPH_C64(0x27D39C9D4E9D9D01), SPH_C64(0xD8C1476CAD6C6C2B), + SPH_C64(0x62F59531C43131A4), SPH_C64(0xE8B98774CD7474F3), + SPH_C64(0xF109E3F6FFF6F615), SPH_C64(0x8C430A460546464C), + SPH_C64(0x452609AC8AACACA5), SPH_C64(0x0F973C891E8989B5), + SPH_C64(0x2844A014501414B4), SPH_C64(0xDF425BE1A3E1E1BA), + SPH_C64(0x2C4EB016581616A6), SPH_C64(0x74D2CD3AE83A3AF7), + SPH_C64(0xD2D06F69B9696906), SPH_C64(0x122D480924090941), + SPH_C64(0xE0ADA770DD7070D7), SPH_C64(0x7154D9B6E2B6B66F), + SPH_C64(0xBDB7CED067D0D01E), SPH_C64(0xC77E3BED93EDEDD6), + SPH_C64(0x85DB2ECC17CCCCE2), SPH_C64(0x84572A4215424268), + SPH_C64(0x2DC2B4985A98982C), SPH_C64(0x550E49A4AAA4A4ED), + SPH_C64(0x50885D28A0282875), SPH_C64(0xB831DA5C6D5C5C86), + SPH_C64(0xED3F93F8C7F8F86B), SPH_C64(0x11A44486228686C2) +}; + +static const uint64_t plain_T2[256] = { + SPH_C64(0x78C018601818D830), SPH_C64(0xAF05238C23232646), + SPH_C64(0xF97EC63FC6C6B891), SPH_C64(0x6F13E887E8E8FBCD), + SPH_C64(0xA14C87268787CB13), SPH_C64(0x62A9B8DAB8B8116D), + SPH_C64(0x0508010401010902), SPH_C64(0x6E424F214F4F0D9E), + SPH_C64(0xEEAD36D836369B6C), SPH_C64(0x0459A6A2A6A6FF51), + SPH_C64(0xBDDED26FD2D20CB9), SPH_C64(0x06FBF5F3F5F50EF7), + SPH_C64(0x80EF79F9797996F2), SPH_C64(0xCE5F6FA16F6F30DE), + SPH_C64(0xEFFC917E91916D3F), SPH_C64(0x07AA52555252F8A4), + SPH_C64(0xFD27609D606047C0), SPH_C64(0x7689BCCABCBC3565), + SPH_C64(0xCDAC9B569B9B372B), SPH_C64(0x8C048E028E8E8A01), + SPH_C64(0x1571A3B6A3A3D25B), SPH_C64(0x3C600C300C0C6C18), + SPH_C64(0x8AFF7BF17B7B84F6), SPH_C64(0xE1B535D43535806A), + SPH_C64(0x69E81D741D1DF53A), SPH_C64(0x4753E0A7E0E0B3DD), + SPH_C64(0xACF6D77BD7D721B3), SPH_C64(0xED5EC22FC2C29C99), + SPH_C64(0x966D2EB82E2E435C), SPH_C64(0x7A624B314B4B2996), + SPH_C64(0x21A3FEDFFEFE5DE1), SPH_C64(0x168257415757D5AE), + SPH_C64(0x41A815541515BD2A), SPH_C64(0xB69F77C17777E8EE), + SPH_C64(0xEBA537DC3737926E), SPH_C64(0x567BE5B3E5E59ED7), + SPH_C64(0xD98C9F469F9F1323), SPH_C64(0x17D3F0E7F0F023FD), + SPH_C64(0x7F6A4A354A4A2094), SPH_C64(0x959EDA4FDADA44A9), + SPH_C64(0x25FA587D5858A2B0), SPH_C64(0xCA06C903C9C9CF8F), + SPH_C64(0x8D5529A429297C52), SPH_C64(0x22500A280A0A5A14), + SPH_C64(0x4FE1B1FEB1B1507F), SPH_C64(0x1A69A0BAA0A0C95D), + SPH_C64(0xDA7F6BB16B6B14D6), SPH_C64(0xAB5C852E8585D917), + SPH_C64(0x7381BDCEBDBD3C67), SPH_C64(0x34D25D695D5D8FBA), + SPH_C64(0x5080104010109020), SPH_C64(0x03F3F4F7F4F407F5), + SPH_C64(0xC016CB0BCBCBDD8B), SPH_C64(0xC6ED3EF83E3ED37C), + SPH_C64(0x1128051405052D0A), SPH_C64(0xE61F6781676778CE), + SPH_C64(0x5373E4B7E4E497D5), SPH_C64(0xBB25279C2727024E), + SPH_C64(0x5832411941417382), SPH_C64(0x9D2C8B168B8BA70B), + SPH_C64(0x0151A7A6A7A7F653), SPH_C64(0x94CF7DE97D7DB2FA), + SPH_C64(0xFBDC956E95954937), SPH_C64(0x9F8ED847D8D856AD), + SPH_C64(0x308BFBCBFBFB70EB), SPH_C64(0x7123EE9FEEEECDC1), + SPH_C64(0x91C77CED7C7CBBF8), SPH_C64(0xE3176685666671CC), + SPH_C64(0x8EA6DD53DDDD7BA7), SPH_C64(0x4BB8175C1717AF2E), + SPH_C64(0x460247014747458E), SPH_C64(0xDC849E429E9E1A21), + SPH_C64(0xC51ECA0FCACAD489), SPH_C64(0x99752DB42D2D585A), + SPH_C64(0x7991BFC6BFBF2E63), SPH_C64(0x1B38071C07073F0E), + SPH_C64(0x2301AD8EADADAC47), SPH_C64(0x2FEA5A755A5AB0B4), + SPH_C64(0xB56C83368383EF1B), SPH_C64(0xFF8533CC3333B666), + SPH_C64(0xF23F639163635CC6), SPH_C64(0x0A10020802021204), + SPH_C64(0x3839AA92AAAA9349), SPH_C64(0xA8AF71D97171DEE2), + SPH_C64(0xCF0EC807C8C8C68D), SPH_C64(0x7DC819641919D132), + SPH_C64(0x7072493949493B92), SPH_C64(0x9A86D943D9D95FAF), + SPH_C64(0x1DC3F2EFF2F231F9), SPH_C64(0x484BE3ABE3E3A8DB), + SPH_C64(0x2AE25B715B5BB9B6), SPH_C64(0x9234881A8888BC0D), + SPH_C64(0xC8A49A529A9A3E29), SPH_C64(0xBE2D269826260B4C), + SPH_C64(0xFA8D32C83232BF64), SPH_C64(0x4AE9B0FAB0B0597D), + SPH_C64(0x6A1BE983E9E9F2CF), SPH_C64(0x33780F3C0F0F771E), + SPH_C64(0xA6E6D573D5D533B7), SPH_C64(0xBA74803A8080F41D), + SPH_C64(0x7C99BEC2BEBE2761), SPH_C64(0xDE26CD13CDCDEB87), + SPH_C64(0xE4BD34D034348968), SPH_C64(0x757A483D48483290), + SPH_C64(0x24ABFFDBFFFF54E3), SPH_C64(0x8FF77AF57A7A8DF4), + SPH_C64(0xEAF4907A9090643D), SPH_C64(0x3EC25F615F5F9DBE), + SPH_C64(0xA01D208020203D40), SPH_C64(0xD56768BD68680FD0), + SPH_C64(0x72D01A681A1ACA34), SPH_C64(0x2C19AE82AEAEB741), + SPH_C64(0x5EC9B4EAB4B47D75), SPH_C64(0x199A544D5454CEA8), + SPH_C64(0xE5EC937693937F3B), SPH_C64(0xAA0D228822222F44), + SPH_C64(0xE907648D646463C8), SPH_C64(0x12DBF1E3F1F12AFF), + SPH_C64(0xA2BF73D17373CCE6), SPH_C64(0x5A90124812128224), + SPH_C64(0x5D3A401D40407A80), SPH_C64(0x2840082008084810), + SPH_C64(0xE856C32BC3C3959B), SPH_C64(0x7B33EC97ECECDFC5), + SPH_C64(0x9096DB4BDBDB4DAB), SPH_C64(0x1F61A1BEA1A1C05F), + SPH_C64(0x831C8D0E8D8D9107), SPH_C64(0xC9F53DF43D3DC87A), + SPH_C64(0xF1CC976697975B33), SPH_C64(0x0000000000000000), + SPH_C64(0xD436CF1BCFCFF983), SPH_C64(0x87452BAC2B2B6E56), + SPH_C64(0xB39776C57676E1EC), SPH_C64(0xB06482328282E619), + SPH_C64(0xA9FED67FD6D628B1), SPH_C64(0x77D81B6C1B1BC336), + SPH_C64(0x5BC1B5EEB5B57477), SPH_C64(0x2911AF86AFAFBE43), + SPH_C64(0xDF776AB56A6A1DD4), SPH_C64(0x0DBA505D5050EAA0), + SPH_C64(0x4C1245094545578A), SPH_C64(0x18CBF3EBF3F338FB), + SPH_C64(0xF09D30C03030AD60), SPH_C64(0x742BEF9BEFEFC4C3), + SPH_C64(0xC3E53FFC3F3FDA7E), SPH_C64(0x1C9255495555C7AA), + SPH_C64(0x1079A2B2A2A2DB59), SPH_C64(0x6503EA8FEAEAE9C9), + SPH_C64(0xEC0F658965656ACA), SPH_C64(0x68B9BAD2BABA0369), + SPH_C64(0x93652FBC2F2F4A5E), SPH_C64(0xE74EC027C0C08E9D), + SPH_C64(0x81BEDE5FDEDE60A1), SPH_C64(0x6CE01C701C1CFC38), + SPH_C64(0x2EBBFDD3FDFD46E7), SPH_C64(0x64524D294D4D1F9A), + SPH_C64(0xE0E4927292927639), SPH_C64(0xBC8F75C97575FAEA), + SPH_C64(0x1E3006180606360C), SPH_C64(0x98248A128A8AAE09), + SPH_C64(0x40F9B2F2B2B24B79), SPH_C64(0x5963E6BFE6E685D1), + SPH_C64(0x36700E380E0E7E1C), SPH_C64(0x63F81F7C1F1FE73E), + SPH_C64(0xF7376295626255C4), SPH_C64(0xA3EED477D4D43AB5), + SPH_C64(0x3229A89AA8A8814D), SPH_C64(0xF4C4966296965231), + SPH_C64(0x3A9BF9C3F9F962EF), SPH_C64(0xF666C533C5C5A397), + SPH_C64(0xB13525942525104A), SPH_C64(0x20F259795959ABB2), + SPH_C64(0xAE54842A8484D015), SPH_C64(0xA7B772D57272C5E4), + SPH_C64(0xDDD539E43939EC72), SPH_C64(0x615A4C2D4C4C1698), + SPH_C64(0x3BCA5E655E5E94BC), SPH_C64(0x85E778FD78789FF0), + SPH_C64(0xD8DD38E03838E570), SPH_C64(0x86148C0A8C8C9805), + SPH_C64(0xB2C6D163D1D117BF), SPH_C64(0x0B41A5AEA5A5E457), + SPH_C64(0x4D43E2AFE2E2A1D9), SPH_C64(0xF82F619961614EC2), + SPH_C64(0x45F1B3F6B3B3427B), SPH_C64(0xA515218421213442), + SPH_C64(0xD6949C4A9C9C0825), SPH_C64(0x66F01E781E1EEE3C), + SPH_C64(0x5222431143436186), SPH_C64(0xFC76C73BC7C7B193), + SPH_C64(0x2BB3FCD7FCFC4FE5), SPH_C64(0x1420041004042408), + SPH_C64(0x08B251595151E3A2), SPH_C64(0xC7BC995E9999252F), + SPH_C64(0xC44F6DA96D6D22DA), SPH_C64(0x39680D340D0D651A), + SPH_C64(0x3583FACFFAFA79E9), SPH_C64(0x84B6DF5BDFDF69A3), + SPH_C64(0x9BD77EE57E7EA9FC), SPH_C64(0xB43D249024241948), + SPH_C64(0xD7C53BEC3B3BFE76), SPH_C64(0x3D31AB96ABAB9A4B), + SPH_C64(0xD13ECE1FCECEF081), SPH_C64(0x5588114411119922), + SPH_C64(0x890C8F068F8F8303), SPH_C64(0x6B4A4E254E4E049C), + SPH_C64(0x51D1B7E6B7B76673), SPH_C64(0x600BEB8BEBEBE0CB), + SPH_C64(0xCCFD3CF03C3CC178), SPH_C64(0xBF7C813E8181FD1F), + SPH_C64(0xFED4946A94944035), SPH_C64(0x0CEBF7FBF7F71CF3), + SPH_C64(0x67A1B9DEB9B9186F), SPH_C64(0x5F98134C13138B26), + SPH_C64(0x9C7D2CB02C2C5158), SPH_C64(0xB8D6D36BD3D305BB), + SPH_C64(0x5C6BE7BBE7E78CD3), SPH_C64(0xCB576EA56E6E39DC), + SPH_C64(0xF36EC437C4C4AA95), SPH_C64(0x0F18030C03031B06), + SPH_C64(0x138A56455656DCAC), SPH_C64(0x491A440D44445E88), + SPH_C64(0x9EDF7FE17F7FA0FE), SPH_C64(0x3721A99EA9A9884F), + SPH_C64(0x824D2AA82A2A6754), SPH_C64(0x6DB1BBD6BBBB0A6B), + SPH_C64(0xE246C123C1C1879F), SPH_C64(0x02A253515353F1A6), + SPH_C64(0x8BAEDC57DCDC72A5), SPH_C64(0x27580B2C0B0B5316), + SPH_C64(0xD39C9D4E9D9D0127), SPH_C64(0xC1476CAD6C6C2BD8), + SPH_C64(0xF59531C43131A462), SPH_C64(0xB98774CD7474F3E8), + SPH_C64(0x09E3F6FFF6F615F1), SPH_C64(0x430A460546464C8C), + SPH_C64(0x2609AC8AACACA545), SPH_C64(0x973C891E8989B50F), + SPH_C64(0x44A014501414B428), SPH_C64(0x425BE1A3E1E1BADF), + SPH_C64(0x4EB016581616A62C), SPH_C64(0xD2CD3AE83A3AF774), + SPH_C64(0xD06F69B9696906D2), SPH_C64(0x2D48092409094112), + SPH_C64(0xADA770DD7070D7E0), SPH_C64(0x54D9B6E2B6B66F71), + SPH_C64(0xB7CED067D0D01EBD), SPH_C64(0x7E3BED93EDEDD6C7), + SPH_C64(0xDB2ECC17CCCCE285), SPH_C64(0x572A421542426884), + SPH_C64(0xC2B4985A98982C2D), SPH_C64(0x0E49A4AAA4A4ED55), + SPH_C64(0x885D28A028287550), SPH_C64(0x31DA5C6D5C5C86B8), + SPH_C64(0x3F93F8C7F8F86BED), SPH_C64(0xA44486228686C211) +}; + +static const uint64_t plain_T3[256] = { + SPH_C64(0xC018601818D83078), SPH_C64(0x05238C23232646AF), + SPH_C64(0x7EC63FC6C6B891F9), SPH_C64(0x13E887E8E8FBCD6F), + SPH_C64(0x4C87268787CB13A1), SPH_C64(0xA9B8DAB8B8116D62), + SPH_C64(0x0801040101090205), SPH_C64(0x424F214F4F0D9E6E), + SPH_C64(0xAD36D836369B6CEE), SPH_C64(0x59A6A2A6A6FF5104), + SPH_C64(0xDED26FD2D20CB9BD), SPH_C64(0xFBF5F3F5F50EF706), + SPH_C64(0xEF79F9797996F280), SPH_C64(0x5F6FA16F6F30DECE), + SPH_C64(0xFC917E91916D3FEF), SPH_C64(0xAA52555252F8A407), + SPH_C64(0x27609D606047C0FD), SPH_C64(0x89BCCABCBC356576), + SPH_C64(0xAC9B569B9B372BCD), SPH_C64(0x048E028E8E8A018C), + SPH_C64(0x71A3B6A3A3D25B15), SPH_C64(0x600C300C0C6C183C), + SPH_C64(0xFF7BF17B7B84F68A), SPH_C64(0xB535D43535806AE1), + SPH_C64(0xE81D741D1DF53A69), SPH_C64(0x53E0A7E0E0B3DD47), + SPH_C64(0xF6D77BD7D721B3AC), SPH_C64(0x5EC22FC2C29C99ED), + SPH_C64(0x6D2EB82E2E435C96), SPH_C64(0x624B314B4B29967A), + SPH_C64(0xA3FEDFFEFE5DE121), SPH_C64(0x8257415757D5AE16), + SPH_C64(0xA815541515BD2A41), SPH_C64(0x9F77C17777E8EEB6), + SPH_C64(0xA537DC3737926EEB), SPH_C64(0x7BE5B3E5E59ED756), + SPH_C64(0x8C9F469F9F1323D9), SPH_C64(0xD3F0E7F0F023FD17), + SPH_C64(0x6A4A354A4A20947F), SPH_C64(0x9EDA4FDADA44A995), + SPH_C64(0xFA587D5858A2B025), SPH_C64(0x06C903C9C9CF8FCA), + SPH_C64(0x5529A429297C528D), SPH_C64(0x500A280A0A5A1422), + SPH_C64(0xE1B1FEB1B1507F4F), SPH_C64(0x69A0BAA0A0C95D1A), + SPH_C64(0x7F6BB16B6B14D6DA), SPH_C64(0x5C852E8585D917AB), + SPH_C64(0x81BDCEBDBD3C6773), SPH_C64(0xD25D695D5D8FBA34), + SPH_C64(0x8010401010902050), SPH_C64(0xF3F4F7F4F407F503), + SPH_C64(0x16CB0BCBCBDD8BC0), SPH_C64(0xED3EF83E3ED37CC6), + SPH_C64(0x28051405052D0A11), SPH_C64(0x1F6781676778CEE6), + SPH_C64(0x73E4B7E4E497D553), SPH_C64(0x25279C2727024EBB), + SPH_C64(0x3241194141738258), SPH_C64(0x2C8B168B8BA70B9D), + SPH_C64(0x51A7A6A7A7F65301), SPH_C64(0xCF7DE97D7DB2FA94), + SPH_C64(0xDC956E95954937FB), SPH_C64(0x8ED847D8D856AD9F), + SPH_C64(0x8BFBCBFBFB70EB30), SPH_C64(0x23EE9FEEEECDC171), + SPH_C64(0xC77CED7C7CBBF891), SPH_C64(0x176685666671CCE3), + SPH_C64(0xA6DD53DDDD7BA78E), SPH_C64(0xB8175C1717AF2E4B), + SPH_C64(0x0247014747458E46), SPH_C64(0x849E429E9E1A21DC), + SPH_C64(0x1ECA0FCACAD489C5), SPH_C64(0x752DB42D2D585A99), + SPH_C64(0x91BFC6BFBF2E6379), SPH_C64(0x38071C07073F0E1B), + SPH_C64(0x01AD8EADADAC4723), SPH_C64(0xEA5A755A5AB0B42F), + SPH_C64(0x6C83368383EF1BB5), SPH_C64(0x8533CC3333B666FF), + SPH_C64(0x3F639163635CC6F2), SPH_C64(0x100208020212040A), + SPH_C64(0x39AA92AAAA934938), SPH_C64(0xAF71D97171DEE2A8), + SPH_C64(0x0EC807C8C8C68DCF), SPH_C64(0xC819641919D1327D), + SPH_C64(0x72493949493B9270), SPH_C64(0x86D943D9D95FAF9A), + SPH_C64(0xC3F2EFF2F231F91D), SPH_C64(0x4BE3ABE3E3A8DB48), + SPH_C64(0xE25B715B5BB9B62A), SPH_C64(0x34881A8888BC0D92), + SPH_C64(0xA49A529A9A3E29C8), SPH_C64(0x2D269826260B4CBE), + SPH_C64(0x8D32C83232BF64FA), SPH_C64(0xE9B0FAB0B0597D4A), + SPH_C64(0x1BE983E9E9F2CF6A), SPH_C64(0x780F3C0F0F771E33), + SPH_C64(0xE6D573D5D533B7A6), SPH_C64(0x74803A8080F41DBA), + SPH_C64(0x99BEC2BEBE27617C), SPH_C64(0x26CD13CDCDEB87DE), + SPH_C64(0xBD34D034348968E4), SPH_C64(0x7A483D4848329075), + SPH_C64(0xABFFDBFFFF54E324), SPH_C64(0xF77AF57A7A8DF48F), + SPH_C64(0xF4907A9090643DEA), SPH_C64(0xC25F615F5F9DBE3E), + SPH_C64(0x1D208020203D40A0), SPH_C64(0x6768BD68680FD0D5), + SPH_C64(0xD01A681A1ACA3472), SPH_C64(0x19AE82AEAEB7412C), + SPH_C64(0xC9B4EAB4B47D755E), SPH_C64(0x9A544D5454CEA819), + SPH_C64(0xEC937693937F3BE5), SPH_C64(0x0D228822222F44AA), + SPH_C64(0x07648D646463C8E9), SPH_C64(0xDBF1E3F1F12AFF12), + SPH_C64(0xBF73D17373CCE6A2), SPH_C64(0x901248121282245A), + SPH_C64(0x3A401D40407A805D), SPH_C64(0x4008200808481028), + SPH_C64(0x56C32BC3C3959BE8), SPH_C64(0x33EC97ECECDFC57B), + SPH_C64(0x96DB4BDBDB4DAB90), SPH_C64(0x61A1BEA1A1C05F1F), + SPH_C64(0x1C8D0E8D8D910783), SPH_C64(0xF53DF43D3DC87AC9), + SPH_C64(0xCC976697975B33F1), SPH_C64(0x0000000000000000), + SPH_C64(0x36CF1BCFCFF983D4), SPH_C64(0x452BAC2B2B6E5687), + SPH_C64(0x9776C57676E1ECB3), SPH_C64(0x6482328282E619B0), + SPH_C64(0xFED67FD6D628B1A9), SPH_C64(0xD81B6C1B1BC33677), + SPH_C64(0xC1B5EEB5B574775B), SPH_C64(0x11AF86AFAFBE4329), + SPH_C64(0x776AB56A6A1DD4DF), SPH_C64(0xBA505D5050EAA00D), + SPH_C64(0x1245094545578A4C), SPH_C64(0xCBF3EBF3F338FB18), + SPH_C64(0x9D30C03030AD60F0), SPH_C64(0x2BEF9BEFEFC4C374), + SPH_C64(0xE53FFC3F3FDA7EC3), SPH_C64(0x9255495555C7AA1C), + SPH_C64(0x79A2B2A2A2DB5910), SPH_C64(0x03EA8FEAEAE9C965), + SPH_C64(0x0F658965656ACAEC), SPH_C64(0xB9BAD2BABA036968), + SPH_C64(0x652FBC2F2F4A5E93), SPH_C64(0x4EC027C0C08E9DE7), + SPH_C64(0xBEDE5FDEDE60A181), SPH_C64(0xE01C701C1CFC386C), + SPH_C64(0xBBFDD3FDFD46E72E), SPH_C64(0x524D294D4D1F9A64), + SPH_C64(0xE4927292927639E0), SPH_C64(0x8F75C97575FAEABC), + SPH_C64(0x3006180606360C1E), SPH_C64(0x248A128A8AAE0998), + SPH_C64(0xF9B2F2B2B24B7940), SPH_C64(0x63E6BFE6E685D159), + SPH_C64(0x700E380E0E7E1C36), SPH_C64(0xF81F7C1F1FE73E63), + SPH_C64(0x376295626255C4F7), SPH_C64(0xEED477D4D43AB5A3), + SPH_C64(0x29A89AA8A8814D32), SPH_C64(0xC4966296965231F4), + SPH_C64(0x9BF9C3F9F962EF3A), SPH_C64(0x66C533C5C5A397F6), + SPH_C64(0x3525942525104AB1), SPH_C64(0xF259795959ABB220), + SPH_C64(0x54842A8484D015AE), SPH_C64(0xB772D57272C5E4A7), + SPH_C64(0xD539E43939EC72DD), SPH_C64(0x5A4C2D4C4C169861), + SPH_C64(0xCA5E655E5E94BC3B), SPH_C64(0xE778FD78789FF085), + SPH_C64(0xDD38E03838E570D8), SPH_C64(0x148C0A8C8C980586), + SPH_C64(0xC6D163D1D117BFB2), SPH_C64(0x41A5AEA5A5E4570B), + SPH_C64(0x43E2AFE2E2A1D94D), SPH_C64(0x2F619961614EC2F8), + SPH_C64(0xF1B3F6B3B3427B45), SPH_C64(0x15218421213442A5), + SPH_C64(0x949C4A9C9C0825D6), SPH_C64(0xF01E781E1EEE3C66), + SPH_C64(0x2243114343618652), SPH_C64(0x76C73BC7C7B193FC), + SPH_C64(0xB3FCD7FCFC4FE52B), SPH_C64(0x2004100404240814), + SPH_C64(0xB251595151E3A208), SPH_C64(0xBC995E9999252FC7), + SPH_C64(0x4F6DA96D6D22DAC4), SPH_C64(0x680D340D0D651A39), + SPH_C64(0x83FACFFAFA79E935), SPH_C64(0xB6DF5BDFDF69A384), + SPH_C64(0xD77EE57E7EA9FC9B), SPH_C64(0x3D249024241948B4), + SPH_C64(0xC53BEC3B3BFE76D7), SPH_C64(0x31AB96ABAB9A4B3D), + SPH_C64(0x3ECE1FCECEF081D1), SPH_C64(0x8811441111992255), + SPH_C64(0x0C8F068F8F830389), SPH_C64(0x4A4E254E4E049C6B), + SPH_C64(0xD1B7E6B7B7667351), SPH_C64(0x0BEB8BEBEBE0CB60), + SPH_C64(0xFD3CF03C3CC178CC), SPH_C64(0x7C813E8181FD1FBF), + SPH_C64(0xD4946A94944035FE), SPH_C64(0xEBF7FBF7F71CF30C), + SPH_C64(0xA1B9DEB9B9186F67), SPH_C64(0x98134C13138B265F), + SPH_C64(0x7D2CB02C2C51589C), SPH_C64(0xD6D36BD3D305BBB8), + SPH_C64(0x6BE7BBE7E78CD35C), SPH_C64(0x576EA56E6E39DCCB), + SPH_C64(0x6EC437C4C4AA95F3), SPH_C64(0x18030C03031B060F), + SPH_C64(0x8A56455656DCAC13), SPH_C64(0x1A440D44445E8849), + SPH_C64(0xDF7FE17F7FA0FE9E), SPH_C64(0x21A99EA9A9884F37), + SPH_C64(0x4D2AA82A2A675482), SPH_C64(0xB1BBD6BBBB0A6B6D), + SPH_C64(0x46C123C1C1879FE2), SPH_C64(0xA253515353F1A602), + SPH_C64(0xAEDC57DCDC72A58B), SPH_C64(0x580B2C0B0B531627), + SPH_C64(0x9C9D4E9D9D0127D3), SPH_C64(0x476CAD6C6C2BD8C1), + SPH_C64(0x9531C43131A462F5), SPH_C64(0x8774CD7474F3E8B9), + SPH_C64(0xE3F6FFF6F615F109), SPH_C64(0x0A460546464C8C43), + SPH_C64(0x09AC8AACACA54526), SPH_C64(0x3C891E8989B50F97), + SPH_C64(0xA014501414B42844), SPH_C64(0x5BE1A3E1E1BADF42), + SPH_C64(0xB016581616A62C4E), SPH_C64(0xCD3AE83A3AF774D2), + SPH_C64(0x6F69B9696906D2D0), SPH_C64(0x480924090941122D), + SPH_C64(0xA770DD7070D7E0AD), SPH_C64(0xD9B6E2B6B66F7154), + SPH_C64(0xCED067D0D01EBDB7), SPH_C64(0x3BED93EDEDD6C77E), + SPH_C64(0x2ECC17CCCCE285DB), SPH_C64(0x2A42154242688457), + SPH_C64(0xB4985A98982C2DC2), SPH_C64(0x49A4AAA4A4ED550E), + SPH_C64(0x5D28A02828755088), SPH_C64(0xDA5C6D5C5C86B831), + SPH_C64(0x93F8C7F8F86BED3F), SPH_C64(0x4486228686C211A4) +}; + +static const uint64_t plain_T4[256] = { + SPH_C64(0x18601818D83078C0), SPH_C64(0x238C23232646AF05), + SPH_C64(0xC63FC6C6B891F97E), SPH_C64(0xE887E8E8FBCD6F13), + SPH_C64(0x87268787CB13A14C), SPH_C64(0xB8DAB8B8116D62A9), + SPH_C64(0x0104010109020508), SPH_C64(0x4F214F4F0D9E6E42), + SPH_C64(0x36D836369B6CEEAD), SPH_C64(0xA6A2A6A6FF510459), + SPH_C64(0xD26FD2D20CB9BDDE), SPH_C64(0xF5F3F5F50EF706FB), + SPH_C64(0x79F9797996F280EF), SPH_C64(0x6FA16F6F30DECE5F), + SPH_C64(0x917E91916D3FEFFC), SPH_C64(0x52555252F8A407AA), + SPH_C64(0x609D606047C0FD27), SPH_C64(0xBCCABCBC35657689), + SPH_C64(0x9B569B9B372BCDAC), SPH_C64(0x8E028E8E8A018C04), + SPH_C64(0xA3B6A3A3D25B1571), SPH_C64(0x0C300C0C6C183C60), + SPH_C64(0x7BF17B7B84F68AFF), SPH_C64(0x35D43535806AE1B5), + SPH_C64(0x1D741D1DF53A69E8), SPH_C64(0xE0A7E0E0B3DD4753), + SPH_C64(0xD77BD7D721B3ACF6), SPH_C64(0xC22FC2C29C99ED5E), + SPH_C64(0x2EB82E2E435C966D), SPH_C64(0x4B314B4B29967A62), + SPH_C64(0xFEDFFEFE5DE121A3), SPH_C64(0x57415757D5AE1682), + SPH_C64(0x15541515BD2A41A8), SPH_C64(0x77C17777E8EEB69F), + SPH_C64(0x37DC3737926EEBA5), SPH_C64(0xE5B3E5E59ED7567B), + SPH_C64(0x9F469F9F1323D98C), SPH_C64(0xF0E7F0F023FD17D3), + SPH_C64(0x4A354A4A20947F6A), SPH_C64(0xDA4FDADA44A9959E), + SPH_C64(0x587D5858A2B025FA), SPH_C64(0xC903C9C9CF8FCA06), + SPH_C64(0x29A429297C528D55), SPH_C64(0x0A280A0A5A142250), + SPH_C64(0xB1FEB1B1507F4FE1), SPH_C64(0xA0BAA0A0C95D1A69), + SPH_C64(0x6BB16B6B14D6DA7F), SPH_C64(0x852E8585D917AB5C), + SPH_C64(0xBDCEBDBD3C677381), SPH_C64(0x5D695D5D8FBA34D2), + SPH_C64(0x1040101090205080), SPH_C64(0xF4F7F4F407F503F3), + SPH_C64(0xCB0BCBCBDD8BC016), SPH_C64(0x3EF83E3ED37CC6ED), + SPH_C64(0x051405052D0A1128), SPH_C64(0x6781676778CEE61F), + SPH_C64(0xE4B7E4E497D55373), SPH_C64(0x279C2727024EBB25), + SPH_C64(0x4119414173825832), SPH_C64(0x8B168B8BA70B9D2C), + SPH_C64(0xA7A6A7A7F6530151), SPH_C64(0x7DE97D7DB2FA94CF), + SPH_C64(0x956E95954937FBDC), SPH_C64(0xD847D8D856AD9F8E), + SPH_C64(0xFBCBFBFB70EB308B), SPH_C64(0xEE9FEEEECDC17123), + SPH_C64(0x7CED7C7CBBF891C7), SPH_C64(0x6685666671CCE317), + SPH_C64(0xDD53DDDD7BA78EA6), SPH_C64(0x175C1717AF2E4BB8), + SPH_C64(0x47014747458E4602), SPH_C64(0x9E429E9E1A21DC84), + SPH_C64(0xCA0FCACAD489C51E), SPH_C64(0x2DB42D2D585A9975), + SPH_C64(0xBFC6BFBF2E637991), SPH_C64(0x071C07073F0E1B38), + SPH_C64(0xAD8EADADAC472301), SPH_C64(0x5A755A5AB0B42FEA), + SPH_C64(0x83368383EF1BB56C), SPH_C64(0x33CC3333B666FF85), + SPH_C64(0x639163635CC6F23F), SPH_C64(0x0208020212040A10), + SPH_C64(0xAA92AAAA93493839), SPH_C64(0x71D97171DEE2A8AF), + SPH_C64(0xC807C8C8C68DCF0E), SPH_C64(0x19641919D1327DC8), + SPH_C64(0x493949493B927072), SPH_C64(0xD943D9D95FAF9A86), + SPH_C64(0xF2EFF2F231F91DC3), SPH_C64(0xE3ABE3E3A8DB484B), + SPH_C64(0x5B715B5BB9B62AE2), SPH_C64(0x881A8888BC0D9234), + SPH_C64(0x9A529A9A3E29C8A4), SPH_C64(0x269826260B4CBE2D), + SPH_C64(0x32C83232BF64FA8D), SPH_C64(0xB0FAB0B0597D4AE9), + SPH_C64(0xE983E9E9F2CF6A1B), SPH_C64(0x0F3C0F0F771E3378), + SPH_C64(0xD573D5D533B7A6E6), SPH_C64(0x803A8080F41DBA74), + SPH_C64(0xBEC2BEBE27617C99), SPH_C64(0xCD13CDCDEB87DE26), + SPH_C64(0x34D034348968E4BD), SPH_C64(0x483D48483290757A), + SPH_C64(0xFFDBFFFF54E324AB), SPH_C64(0x7AF57A7A8DF48FF7), + SPH_C64(0x907A9090643DEAF4), SPH_C64(0x5F615F5F9DBE3EC2), + SPH_C64(0x208020203D40A01D), SPH_C64(0x68BD68680FD0D567), + SPH_C64(0x1A681A1ACA3472D0), SPH_C64(0xAE82AEAEB7412C19), + SPH_C64(0xB4EAB4B47D755EC9), SPH_C64(0x544D5454CEA8199A), + SPH_C64(0x937693937F3BE5EC), SPH_C64(0x228822222F44AA0D), + SPH_C64(0x648D646463C8E907), SPH_C64(0xF1E3F1F12AFF12DB), + SPH_C64(0x73D17373CCE6A2BF), SPH_C64(0x1248121282245A90), + SPH_C64(0x401D40407A805D3A), SPH_C64(0x0820080848102840), + SPH_C64(0xC32BC3C3959BE856), SPH_C64(0xEC97ECECDFC57B33), + SPH_C64(0xDB4BDBDB4DAB9096), SPH_C64(0xA1BEA1A1C05F1F61), + SPH_C64(0x8D0E8D8D9107831C), SPH_C64(0x3DF43D3DC87AC9F5), + SPH_C64(0x976697975B33F1CC), SPH_C64(0x0000000000000000), + SPH_C64(0xCF1BCFCFF983D436), SPH_C64(0x2BAC2B2B6E568745), + SPH_C64(0x76C57676E1ECB397), SPH_C64(0x82328282E619B064), + SPH_C64(0xD67FD6D628B1A9FE), SPH_C64(0x1B6C1B1BC33677D8), + SPH_C64(0xB5EEB5B574775BC1), SPH_C64(0xAF86AFAFBE432911), + SPH_C64(0x6AB56A6A1DD4DF77), SPH_C64(0x505D5050EAA00DBA), + SPH_C64(0x45094545578A4C12), SPH_C64(0xF3EBF3F338FB18CB), + SPH_C64(0x30C03030AD60F09D), SPH_C64(0xEF9BEFEFC4C3742B), + SPH_C64(0x3FFC3F3FDA7EC3E5), SPH_C64(0x55495555C7AA1C92), + SPH_C64(0xA2B2A2A2DB591079), SPH_C64(0xEA8FEAEAE9C96503), + SPH_C64(0x658965656ACAEC0F), SPH_C64(0xBAD2BABA036968B9), + SPH_C64(0x2FBC2F2F4A5E9365), SPH_C64(0xC027C0C08E9DE74E), + SPH_C64(0xDE5FDEDE60A181BE), SPH_C64(0x1C701C1CFC386CE0), + SPH_C64(0xFDD3FDFD46E72EBB), SPH_C64(0x4D294D4D1F9A6452), + SPH_C64(0x927292927639E0E4), SPH_C64(0x75C97575FAEABC8F), + SPH_C64(0x06180606360C1E30), SPH_C64(0x8A128A8AAE099824), + SPH_C64(0xB2F2B2B24B7940F9), SPH_C64(0xE6BFE6E685D15963), + SPH_C64(0x0E380E0E7E1C3670), SPH_C64(0x1F7C1F1FE73E63F8), + SPH_C64(0x6295626255C4F737), SPH_C64(0xD477D4D43AB5A3EE), + SPH_C64(0xA89AA8A8814D3229), SPH_C64(0x966296965231F4C4), + SPH_C64(0xF9C3F9F962EF3A9B), SPH_C64(0xC533C5C5A397F666), + SPH_C64(0x25942525104AB135), SPH_C64(0x59795959ABB220F2), + SPH_C64(0x842A8484D015AE54), SPH_C64(0x72D57272C5E4A7B7), + SPH_C64(0x39E43939EC72DDD5), SPH_C64(0x4C2D4C4C1698615A), + SPH_C64(0x5E655E5E94BC3BCA), SPH_C64(0x78FD78789FF085E7), + SPH_C64(0x38E03838E570D8DD), SPH_C64(0x8C0A8C8C98058614), + SPH_C64(0xD163D1D117BFB2C6), SPH_C64(0xA5AEA5A5E4570B41), + SPH_C64(0xE2AFE2E2A1D94D43), SPH_C64(0x619961614EC2F82F), + SPH_C64(0xB3F6B3B3427B45F1), SPH_C64(0x218421213442A515), + SPH_C64(0x9C4A9C9C0825D694), SPH_C64(0x1E781E1EEE3C66F0), + SPH_C64(0x4311434361865222), SPH_C64(0xC73BC7C7B193FC76), + SPH_C64(0xFCD7FCFC4FE52BB3), SPH_C64(0x0410040424081420), + SPH_C64(0x51595151E3A208B2), SPH_C64(0x995E9999252FC7BC), + SPH_C64(0x6DA96D6D22DAC44F), SPH_C64(0x0D340D0D651A3968), + SPH_C64(0xFACFFAFA79E93583), SPH_C64(0xDF5BDFDF69A384B6), + SPH_C64(0x7EE57E7EA9FC9BD7), SPH_C64(0x249024241948B43D), + SPH_C64(0x3BEC3B3BFE76D7C5), SPH_C64(0xAB96ABAB9A4B3D31), + SPH_C64(0xCE1FCECEF081D13E), SPH_C64(0x1144111199225588), + SPH_C64(0x8F068F8F8303890C), SPH_C64(0x4E254E4E049C6B4A), + SPH_C64(0xB7E6B7B7667351D1), SPH_C64(0xEB8BEBEBE0CB600B), + SPH_C64(0x3CF03C3CC178CCFD), SPH_C64(0x813E8181FD1FBF7C), + SPH_C64(0x946A94944035FED4), SPH_C64(0xF7FBF7F71CF30CEB), + SPH_C64(0xB9DEB9B9186F67A1), SPH_C64(0x134C13138B265F98), + SPH_C64(0x2CB02C2C51589C7D), SPH_C64(0xD36BD3D305BBB8D6), + SPH_C64(0xE7BBE7E78CD35C6B), SPH_C64(0x6EA56E6E39DCCB57), + SPH_C64(0xC437C4C4AA95F36E), SPH_C64(0x030C03031B060F18), + SPH_C64(0x56455656DCAC138A), SPH_C64(0x440D44445E88491A), + SPH_C64(0x7FE17F7FA0FE9EDF), SPH_C64(0xA99EA9A9884F3721), + SPH_C64(0x2AA82A2A6754824D), SPH_C64(0xBBD6BBBB0A6B6DB1), + SPH_C64(0xC123C1C1879FE246), SPH_C64(0x53515353F1A602A2), + SPH_C64(0xDC57DCDC72A58BAE), SPH_C64(0x0B2C0B0B53162758), + SPH_C64(0x9D4E9D9D0127D39C), SPH_C64(0x6CAD6C6C2BD8C147), + SPH_C64(0x31C43131A462F595), SPH_C64(0x74CD7474F3E8B987), + SPH_C64(0xF6FFF6F615F109E3), SPH_C64(0x460546464C8C430A), + SPH_C64(0xAC8AACACA5452609), SPH_C64(0x891E8989B50F973C), + SPH_C64(0x14501414B42844A0), SPH_C64(0xE1A3E1E1BADF425B), + SPH_C64(0x16581616A62C4EB0), SPH_C64(0x3AE83A3AF774D2CD), + SPH_C64(0x69B9696906D2D06F), SPH_C64(0x0924090941122D48), + SPH_C64(0x70DD7070D7E0ADA7), SPH_C64(0xB6E2B6B66F7154D9), + SPH_C64(0xD067D0D01EBDB7CE), SPH_C64(0xED93EDEDD6C77E3B), + SPH_C64(0xCC17CCCCE285DB2E), SPH_C64(0x421542426884572A), + SPH_C64(0x985A98982C2DC2B4), SPH_C64(0xA4AAA4A4ED550E49), + SPH_C64(0x28A028287550885D), SPH_C64(0x5C6D5C5C86B831DA), + SPH_C64(0xF8C7F8F86BED3F93), SPH_C64(0x86228686C211A444) +}; + +static const uint64_t plain_T5[256] = { + SPH_C64(0x601818D83078C018), SPH_C64(0x8C23232646AF0523), + SPH_C64(0x3FC6C6B891F97EC6), SPH_C64(0x87E8E8FBCD6F13E8), + SPH_C64(0x268787CB13A14C87), SPH_C64(0xDAB8B8116D62A9B8), + SPH_C64(0x0401010902050801), SPH_C64(0x214F4F0D9E6E424F), + SPH_C64(0xD836369B6CEEAD36), SPH_C64(0xA2A6A6FF510459A6), + SPH_C64(0x6FD2D20CB9BDDED2), SPH_C64(0xF3F5F50EF706FBF5), + SPH_C64(0xF9797996F280EF79), SPH_C64(0xA16F6F30DECE5F6F), + SPH_C64(0x7E91916D3FEFFC91), SPH_C64(0x555252F8A407AA52), + SPH_C64(0x9D606047C0FD2760), SPH_C64(0xCABCBC35657689BC), + SPH_C64(0x569B9B372BCDAC9B), SPH_C64(0x028E8E8A018C048E), + SPH_C64(0xB6A3A3D25B1571A3), SPH_C64(0x300C0C6C183C600C), + SPH_C64(0xF17B7B84F68AFF7B), SPH_C64(0xD43535806AE1B535), + SPH_C64(0x741D1DF53A69E81D), SPH_C64(0xA7E0E0B3DD4753E0), + SPH_C64(0x7BD7D721B3ACF6D7), SPH_C64(0x2FC2C29C99ED5EC2), + SPH_C64(0xB82E2E435C966D2E), SPH_C64(0x314B4B29967A624B), + SPH_C64(0xDFFEFE5DE121A3FE), SPH_C64(0x415757D5AE168257), + SPH_C64(0x541515BD2A41A815), SPH_C64(0xC17777E8EEB69F77), + SPH_C64(0xDC3737926EEBA537), SPH_C64(0xB3E5E59ED7567BE5), + SPH_C64(0x469F9F1323D98C9F), SPH_C64(0xE7F0F023FD17D3F0), + SPH_C64(0x354A4A20947F6A4A), SPH_C64(0x4FDADA44A9959EDA), + SPH_C64(0x7D5858A2B025FA58), SPH_C64(0x03C9C9CF8FCA06C9), + SPH_C64(0xA429297C528D5529), SPH_C64(0x280A0A5A1422500A), + SPH_C64(0xFEB1B1507F4FE1B1), SPH_C64(0xBAA0A0C95D1A69A0), + SPH_C64(0xB16B6B14D6DA7F6B), SPH_C64(0x2E8585D917AB5C85), + SPH_C64(0xCEBDBD3C677381BD), SPH_C64(0x695D5D8FBA34D25D), + SPH_C64(0x4010109020508010), SPH_C64(0xF7F4F407F503F3F4), + SPH_C64(0x0BCBCBDD8BC016CB), SPH_C64(0xF83E3ED37CC6ED3E), + SPH_C64(0x1405052D0A112805), SPH_C64(0x81676778CEE61F67), + SPH_C64(0xB7E4E497D55373E4), SPH_C64(0x9C2727024EBB2527), + SPH_C64(0x1941417382583241), SPH_C64(0x168B8BA70B9D2C8B), + SPH_C64(0xA6A7A7F6530151A7), SPH_C64(0xE97D7DB2FA94CF7D), + SPH_C64(0x6E95954937FBDC95), SPH_C64(0x47D8D856AD9F8ED8), + SPH_C64(0xCBFBFB70EB308BFB), SPH_C64(0x9FEEEECDC17123EE), + SPH_C64(0xED7C7CBBF891C77C), SPH_C64(0x85666671CCE31766), + SPH_C64(0x53DDDD7BA78EA6DD), SPH_C64(0x5C1717AF2E4BB817), + SPH_C64(0x014747458E460247), SPH_C64(0x429E9E1A21DC849E), + SPH_C64(0x0FCACAD489C51ECA), SPH_C64(0xB42D2D585A99752D), + SPH_C64(0xC6BFBF2E637991BF), SPH_C64(0x1C07073F0E1B3807), + SPH_C64(0x8EADADAC472301AD), SPH_C64(0x755A5AB0B42FEA5A), + SPH_C64(0x368383EF1BB56C83), SPH_C64(0xCC3333B666FF8533), + SPH_C64(0x9163635CC6F23F63), SPH_C64(0x08020212040A1002), + SPH_C64(0x92AAAA93493839AA), SPH_C64(0xD97171DEE2A8AF71), + SPH_C64(0x07C8C8C68DCF0EC8), SPH_C64(0x641919D1327DC819), + SPH_C64(0x3949493B92707249), SPH_C64(0x43D9D95FAF9A86D9), + SPH_C64(0xEFF2F231F91DC3F2), SPH_C64(0xABE3E3A8DB484BE3), + SPH_C64(0x715B5BB9B62AE25B), SPH_C64(0x1A8888BC0D923488), + SPH_C64(0x529A9A3E29C8A49A), SPH_C64(0x9826260B4CBE2D26), + SPH_C64(0xC83232BF64FA8D32), SPH_C64(0xFAB0B0597D4AE9B0), + SPH_C64(0x83E9E9F2CF6A1BE9), SPH_C64(0x3C0F0F771E33780F), + SPH_C64(0x73D5D533B7A6E6D5), SPH_C64(0x3A8080F41DBA7480), + SPH_C64(0xC2BEBE27617C99BE), SPH_C64(0x13CDCDEB87DE26CD), + SPH_C64(0xD034348968E4BD34), SPH_C64(0x3D48483290757A48), + SPH_C64(0xDBFFFF54E324ABFF), SPH_C64(0xF57A7A8DF48FF77A), + SPH_C64(0x7A9090643DEAF490), SPH_C64(0x615F5F9DBE3EC25F), + SPH_C64(0x8020203D40A01D20), SPH_C64(0xBD68680FD0D56768), + SPH_C64(0x681A1ACA3472D01A), SPH_C64(0x82AEAEB7412C19AE), + SPH_C64(0xEAB4B47D755EC9B4), SPH_C64(0x4D5454CEA8199A54), + SPH_C64(0x7693937F3BE5EC93), SPH_C64(0x8822222F44AA0D22), + SPH_C64(0x8D646463C8E90764), SPH_C64(0xE3F1F12AFF12DBF1), + SPH_C64(0xD17373CCE6A2BF73), SPH_C64(0x48121282245A9012), + SPH_C64(0x1D40407A805D3A40), SPH_C64(0x2008084810284008), + SPH_C64(0x2BC3C3959BE856C3), SPH_C64(0x97ECECDFC57B33EC), + SPH_C64(0x4BDBDB4DAB9096DB), SPH_C64(0xBEA1A1C05F1F61A1), + SPH_C64(0x0E8D8D9107831C8D), SPH_C64(0xF43D3DC87AC9F53D), + SPH_C64(0x6697975B33F1CC97), SPH_C64(0x0000000000000000), + SPH_C64(0x1BCFCFF983D436CF), SPH_C64(0xAC2B2B6E5687452B), + SPH_C64(0xC57676E1ECB39776), SPH_C64(0x328282E619B06482), + SPH_C64(0x7FD6D628B1A9FED6), SPH_C64(0x6C1B1BC33677D81B), + SPH_C64(0xEEB5B574775BC1B5), SPH_C64(0x86AFAFBE432911AF), + SPH_C64(0xB56A6A1DD4DF776A), SPH_C64(0x5D5050EAA00DBA50), + SPH_C64(0x094545578A4C1245), SPH_C64(0xEBF3F338FB18CBF3), + SPH_C64(0xC03030AD60F09D30), SPH_C64(0x9BEFEFC4C3742BEF), + SPH_C64(0xFC3F3FDA7EC3E53F), SPH_C64(0x495555C7AA1C9255), + SPH_C64(0xB2A2A2DB591079A2), SPH_C64(0x8FEAEAE9C96503EA), + SPH_C64(0x8965656ACAEC0F65), SPH_C64(0xD2BABA036968B9BA), + SPH_C64(0xBC2F2F4A5E93652F), SPH_C64(0x27C0C08E9DE74EC0), + SPH_C64(0x5FDEDE60A181BEDE), SPH_C64(0x701C1CFC386CE01C), + SPH_C64(0xD3FDFD46E72EBBFD), SPH_C64(0x294D4D1F9A64524D), + SPH_C64(0x7292927639E0E492), SPH_C64(0xC97575FAEABC8F75), + SPH_C64(0x180606360C1E3006), SPH_C64(0x128A8AAE0998248A), + SPH_C64(0xF2B2B24B7940F9B2), SPH_C64(0xBFE6E685D15963E6), + SPH_C64(0x380E0E7E1C36700E), SPH_C64(0x7C1F1FE73E63F81F), + SPH_C64(0x95626255C4F73762), SPH_C64(0x77D4D43AB5A3EED4), + SPH_C64(0x9AA8A8814D3229A8), SPH_C64(0x6296965231F4C496), + SPH_C64(0xC3F9F962EF3A9BF9), SPH_C64(0x33C5C5A397F666C5), + SPH_C64(0x942525104AB13525), SPH_C64(0x795959ABB220F259), + SPH_C64(0x2A8484D015AE5484), SPH_C64(0xD57272C5E4A7B772), + SPH_C64(0xE43939EC72DDD539), SPH_C64(0x2D4C4C1698615A4C), + SPH_C64(0x655E5E94BC3BCA5E), SPH_C64(0xFD78789FF085E778), + SPH_C64(0xE03838E570D8DD38), SPH_C64(0x0A8C8C980586148C), + SPH_C64(0x63D1D117BFB2C6D1), SPH_C64(0xAEA5A5E4570B41A5), + SPH_C64(0xAFE2E2A1D94D43E2), SPH_C64(0x9961614EC2F82F61), + SPH_C64(0xF6B3B3427B45F1B3), SPH_C64(0x8421213442A51521), + SPH_C64(0x4A9C9C0825D6949C), SPH_C64(0x781E1EEE3C66F01E), + SPH_C64(0x1143436186522243), SPH_C64(0x3BC7C7B193FC76C7), + SPH_C64(0xD7FCFC4FE52BB3FC), SPH_C64(0x1004042408142004), + SPH_C64(0x595151E3A208B251), SPH_C64(0x5E9999252FC7BC99), + SPH_C64(0xA96D6D22DAC44F6D), SPH_C64(0x340D0D651A39680D), + SPH_C64(0xCFFAFA79E93583FA), SPH_C64(0x5BDFDF69A384B6DF), + SPH_C64(0xE57E7EA9FC9BD77E), SPH_C64(0x9024241948B43D24), + SPH_C64(0xEC3B3BFE76D7C53B), SPH_C64(0x96ABAB9A4B3D31AB), + SPH_C64(0x1FCECEF081D13ECE), SPH_C64(0x4411119922558811), + SPH_C64(0x068F8F8303890C8F), SPH_C64(0x254E4E049C6B4A4E), + SPH_C64(0xE6B7B7667351D1B7), SPH_C64(0x8BEBEBE0CB600BEB), + SPH_C64(0xF03C3CC178CCFD3C), SPH_C64(0x3E8181FD1FBF7C81), + SPH_C64(0x6A94944035FED494), SPH_C64(0xFBF7F71CF30CEBF7), + SPH_C64(0xDEB9B9186F67A1B9), SPH_C64(0x4C13138B265F9813), + SPH_C64(0xB02C2C51589C7D2C), SPH_C64(0x6BD3D305BBB8D6D3), + SPH_C64(0xBBE7E78CD35C6BE7), SPH_C64(0xA56E6E39DCCB576E), + SPH_C64(0x37C4C4AA95F36EC4), SPH_C64(0x0C03031B060F1803), + SPH_C64(0x455656DCAC138A56), SPH_C64(0x0D44445E88491A44), + SPH_C64(0xE17F7FA0FE9EDF7F), SPH_C64(0x9EA9A9884F3721A9), + SPH_C64(0xA82A2A6754824D2A), SPH_C64(0xD6BBBB0A6B6DB1BB), + SPH_C64(0x23C1C1879FE246C1), SPH_C64(0x515353F1A602A253), + SPH_C64(0x57DCDC72A58BAEDC), SPH_C64(0x2C0B0B531627580B), + SPH_C64(0x4E9D9D0127D39C9D), SPH_C64(0xAD6C6C2BD8C1476C), + SPH_C64(0xC43131A462F59531), SPH_C64(0xCD7474F3E8B98774), + SPH_C64(0xFFF6F615F109E3F6), SPH_C64(0x0546464C8C430A46), + SPH_C64(0x8AACACA5452609AC), SPH_C64(0x1E8989B50F973C89), + SPH_C64(0x501414B42844A014), SPH_C64(0xA3E1E1BADF425BE1), + SPH_C64(0x581616A62C4EB016), SPH_C64(0xE83A3AF774D2CD3A), + SPH_C64(0xB9696906D2D06F69), SPH_C64(0x24090941122D4809), + SPH_C64(0xDD7070D7E0ADA770), SPH_C64(0xE2B6B66F7154D9B6), + SPH_C64(0x67D0D01EBDB7CED0), SPH_C64(0x93EDEDD6C77E3BED), + SPH_C64(0x17CCCCE285DB2ECC), SPH_C64(0x1542426884572A42), + SPH_C64(0x5A98982C2DC2B498), SPH_C64(0xAAA4A4ED550E49A4), + SPH_C64(0xA028287550885D28), SPH_C64(0x6D5C5C86B831DA5C), + SPH_C64(0xC7F8F86BED3F93F8), SPH_C64(0x228686C211A44486) +}; + +static const uint64_t plain_T6[256] = { + SPH_C64(0x1818D83078C01860), SPH_C64(0x23232646AF05238C), + SPH_C64(0xC6C6B891F97EC63F), SPH_C64(0xE8E8FBCD6F13E887), + SPH_C64(0x8787CB13A14C8726), SPH_C64(0xB8B8116D62A9B8DA), + SPH_C64(0x0101090205080104), SPH_C64(0x4F4F0D9E6E424F21), + SPH_C64(0x36369B6CEEAD36D8), SPH_C64(0xA6A6FF510459A6A2), + SPH_C64(0xD2D20CB9BDDED26F), SPH_C64(0xF5F50EF706FBF5F3), + SPH_C64(0x797996F280EF79F9), SPH_C64(0x6F6F30DECE5F6FA1), + SPH_C64(0x91916D3FEFFC917E), SPH_C64(0x5252F8A407AA5255), + SPH_C64(0x606047C0FD27609D), SPH_C64(0xBCBC35657689BCCA), + SPH_C64(0x9B9B372BCDAC9B56), SPH_C64(0x8E8E8A018C048E02), + SPH_C64(0xA3A3D25B1571A3B6), SPH_C64(0x0C0C6C183C600C30), + SPH_C64(0x7B7B84F68AFF7BF1), SPH_C64(0x3535806AE1B535D4), + SPH_C64(0x1D1DF53A69E81D74), SPH_C64(0xE0E0B3DD4753E0A7), + SPH_C64(0xD7D721B3ACF6D77B), SPH_C64(0xC2C29C99ED5EC22F), + SPH_C64(0x2E2E435C966D2EB8), SPH_C64(0x4B4B29967A624B31), + SPH_C64(0xFEFE5DE121A3FEDF), SPH_C64(0x5757D5AE16825741), + SPH_C64(0x1515BD2A41A81554), SPH_C64(0x7777E8EEB69F77C1), + SPH_C64(0x3737926EEBA537DC), SPH_C64(0xE5E59ED7567BE5B3), + SPH_C64(0x9F9F1323D98C9F46), SPH_C64(0xF0F023FD17D3F0E7), + SPH_C64(0x4A4A20947F6A4A35), SPH_C64(0xDADA44A9959EDA4F), + SPH_C64(0x5858A2B025FA587D), SPH_C64(0xC9C9CF8FCA06C903), + SPH_C64(0x29297C528D5529A4), SPH_C64(0x0A0A5A1422500A28), + SPH_C64(0xB1B1507F4FE1B1FE), SPH_C64(0xA0A0C95D1A69A0BA), + SPH_C64(0x6B6B14D6DA7F6BB1), SPH_C64(0x8585D917AB5C852E), + SPH_C64(0xBDBD3C677381BDCE), SPH_C64(0x5D5D8FBA34D25D69), + SPH_C64(0x1010902050801040), SPH_C64(0xF4F407F503F3F4F7), + SPH_C64(0xCBCBDD8BC016CB0B), SPH_C64(0x3E3ED37CC6ED3EF8), + SPH_C64(0x05052D0A11280514), SPH_C64(0x676778CEE61F6781), + SPH_C64(0xE4E497D55373E4B7), SPH_C64(0x2727024EBB25279C), + SPH_C64(0x4141738258324119), SPH_C64(0x8B8BA70B9D2C8B16), + SPH_C64(0xA7A7F6530151A7A6), SPH_C64(0x7D7DB2FA94CF7DE9), + SPH_C64(0x95954937FBDC956E), SPH_C64(0xD8D856AD9F8ED847), + SPH_C64(0xFBFB70EB308BFBCB), SPH_C64(0xEEEECDC17123EE9F), + SPH_C64(0x7C7CBBF891C77CED), SPH_C64(0x666671CCE3176685), + SPH_C64(0xDDDD7BA78EA6DD53), SPH_C64(0x1717AF2E4BB8175C), + SPH_C64(0x4747458E46024701), SPH_C64(0x9E9E1A21DC849E42), + SPH_C64(0xCACAD489C51ECA0F), SPH_C64(0x2D2D585A99752DB4), + SPH_C64(0xBFBF2E637991BFC6), SPH_C64(0x07073F0E1B38071C), + SPH_C64(0xADADAC472301AD8E), SPH_C64(0x5A5AB0B42FEA5A75), + SPH_C64(0x8383EF1BB56C8336), SPH_C64(0x3333B666FF8533CC), + SPH_C64(0x63635CC6F23F6391), SPH_C64(0x020212040A100208), + SPH_C64(0xAAAA93493839AA92), SPH_C64(0x7171DEE2A8AF71D9), + SPH_C64(0xC8C8C68DCF0EC807), SPH_C64(0x1919D1327DC81964), + SPH_C64(0x49493B9270724939), SPH_C64(0xD9D95FAF9A86D943), + SPH_C64(0xF2F231F91DC3F2EF), SPH_C64(0xE3E3A8DB484BE3AB), + SPH_C64(0x5B5BB9B62AE25B71), SPH_C64(0x8888BC0D9234881A), + SPH_C64(0x9A9A3E29C8A49A52), SPH_C64(0x26260B4CBE2D2698), + SPH_C64(0x3232BF64FA8D32C8), SPH_C64(0xB0B0597D4AE9B0FA), + SPH_C64(0xE9E9F2CF6A1BE983), SPH_C64(0x0F0F771E33780F3C), + SPH_C64(0xD5D533B7A6E6D573), SPH_C64(0x8080F41DBA74803A), + SPH_C64(0xBEBE27617C99BEC2), SPH_C64(0xCDCDEB87DE26CD13), + SPH_C64(0x34348968E4BD34D0), SPH_C64(0x48483290757A483D), + SPH_C64(0xFFFF54E324ABFFDB), SPH_C64(0x7A7A8DF48FF77AF5), + SPH_C64(0x9090643DEAF4907A), SPH_C64(0x5F5F9DBE3EC25F61), + SPH_C64(0x20203D40A01D2080), SPH_C64(0x68680FD0D56768BD), + SPH_C64(0x1A1ACA3472D01A68), SPH_C64(0xAEAEB7412C19AE82), + SPH_C64(0xB4B47D755EC9B4EA), SPH_C64(0x5454CEA8199A544D), + SPH_C64(0x93937F3BE5EC9376), SPH_C64(0x22222F44AA0D2288), + SPH_C64(0x646463C8E907648D), SPH_C64(0xF1F12AFF12DBF1E3), + SPH_C64(0x7373CCE6A2BF73D1), SPH_C64(0x121282245A901248), + SPH_C64(0x40407A805D3A401D), SPH_C64(0x0808481028400820), + SPH_C64(0xC3C3959BE856C32B), SPH_C64(0xECECDFC57B33EC97), + SPH_C64(0xDBDB4DAB9096DB4B), SPH_C64(0xA1A1C05F1F61A1BE), + SPH_C64(0x8D8D9107831C8D0E), SPH_C64(0x3D3DC87AC9F53DF4), + SPH_C64(0x97975B33F1CC9766), SPH_C64(0x0000000000000000), + SPH_C64(0xCFCFF983D436CF1B), SPH_C64(0x2B2B6E5687452BAC), + SPH_C64(0x7676E1ECB39776C5), SPH_C64(0x8282E619B0648232), + SPH_C64(0xD6D628B1A9FED67F), SPH_C64(0x1B1BC33677D81B6C), + SPH_C64(0xB5B574775BC1B5EE), SPH_C64(0xAFAFBE432911AF86), + SPH_C64(0x6A6A1DD4DF776AB5), SPH_C64(0x5050EAA00DBA505D), + SPH_C64(0x4545578A4C124509), SPH_C64(0xF3F338FB18CBF3EB), + SPH_C64(0x3030AD60F09D30C0), SPH_C64(0xEFEFC4C3742BEF9B), + SPH_C64(0x3F3FDA7EC3E53FFC), SPH_C64(0x5555C7AA1C925549), + SPH_C64(0xA2A2DB591079A2B2), SPH_C64(0xEAEAE9C96503EA8F), + SPH_C64(0x65656ACAEC0F6589), SPH_C64(0xBABA036968B9BAD2), + SPH_C64(0x2F2F4A5E93652FBC), SPH_C64(0xC0C08E9DE74EC027), + SPH_C64(0xDEDE60A181BEDE5F), SPH_C64(0x1C1CFC386CE01C70), + SPH_C64(0xFDFD46E72EBBFDD3), SPH_C64(0x4D4D1F9A64524D29), + SPH_C64(0x92927639E0E49272), SPH_C64(0x7575FAEABC8F75C9), + SPH_C64(0x0606360C1E300618), SPH_C64(0x8A8AAE0998248A12), + SPH_C64(0xB2B24B7940F9B2F2), SPH_C64(0xE6E685D15963E6BF), + SPH_C64(0x0E0E7E1C36700E38), SPH_C64(0x1F1FE73E63F81F7C), + SPH_C64(0x626255C4F7376295), SPH_C64(0xD4D43AB5A3EED477), + SPH_C64(0xA8A8814D3229A89A), SPH_C64(0x96965231F4C49662), + SPH_C64(0xF9F962EF3A9BF9C3), SPH_C64(0xC5C5A397F666C533), + SPH_C64(0x2525104AB1352594), SPH_C64(0x5959ABB220F25979), + SPH_C64(0x8484D015AE54842A), SPH_C64(0x7272C5E4A7B772D5), + SPH_C64(0x3939EC72DDD539E4), SPH_C64(0x4C4C1698615A4C2D), + SPH_C64(0x5E5E94BC3BCA5E65), SPH_C64(0x78789FF085E778FD), + SPH_C64(0x3838E570D8DD38E0), SPH_C64(0x8C8C980586148C0A), + SPH_C64(0xD1D117BFB2C6D163), SPH_C64(0xA5A5E4570B41A5AE), + SPH_C64(0xE2E2A1D94D43E2AF), SPH_C64(0x61614EC2F82F6199), + SPH_C64(0xB3B3427B45F1B3F6), SPH_C64(0x21213442A5152184), + SPH_C64(0x9C9C0825D6949C4A), SPH_C64(0x1E1EEE3C66F01E78), + SPH_C64(0x4343618652224311), SPH_C64(0xC7C7B193FC76C73B), + SPH_C64(0xFCFC4FE52BB3FCD7), SPH_C64(0x0404240814200410), + SPH_C64(0x5151E3A208B25159), SPH_C64(0x9999252FC7BC995E), + SPH_C64(0x6D6D22DAC44F6DA9), SPH_C64(0x0D0D651A39680D34), + SPH_C64(0xFAFA79E93583FACF), SPH_C64(0xDFDF69A384B6DF5B), + SPH_C64(0x7E7EA9FC9BD77EE5), SPH_C64(0x24241948B43D2490), + SPH_C64(0x3B3BFE76D7C53BEC), SPH_C64(0xABAB9A4B3D31AB96), + SPH_C64(0xCECEF081D13ECE1F), SPH_C64(0x1111992255881144), + SPH_C64(0x8F8F8303890C8F06), SPH_C64(0x4E4E049C6B4A4E25), + SPH_C64(0xB7B7667351D1B7E6), SPH_C64(0xEBEBE0CB600BEB8B), + SPH_C64(0x3C3CC178CCFD3CF0), SPH_C64(0x8181FD1FBF7C813E), + SPH_C64(0x94944035FED4946A), SPH_C64(0xF7F71CF30CEBF7FB), + SPH_C64(0xB9B9186F67A1B9DE), SPH_C64(0x13138B265F98134C), + SPH_C64(0x2C2C51589C7D2CB0), SPH_C64(0xD3D305BBB8D6D36B), + SPH_C64(0xE7E78CD35C6BE7BB), SPH_C64(0x6E6E39DCCB576EA5), + SPH_C64(0xC4C4AA95F36EC437), SPH_C64(0x03031B060F18030C), + SPH_C64(0x5656DCAC138A5645), SPH_C64(0x44445E88491A440D), + SPH_C64(0x7F7FA0FE9EDF7FE1), SPH_C64(0xA9A9884F3721A99E), + SPH_C64(0x2A2A6754824D2AA8), SPH_C64(0xBBBB0A6B6DB1BBD6), + SPH_C64(0xC1C1879FE246C123), SPH_C64(0x5353F1A602A25351), + SPH_C64(0xDCDC72A58BAEDC57), SPH_C64(0x0B0B531627580B2C), + SPH_C64(0x9D9D0127D39C9D4E), SPH_C64(0x6C6C2BD8C1476CAD), + SPH_C64(0x3131A462F59531C4), SPH_C64(0x7474F3E8B98774CD), + SPH_C64(0xF6F615F109E3F6FF), SPH_C64(0x46464C8C430A4605), + SPH_C64(0xACACA5452609AC8A), SPH_C64(0x8989B50F973C891E), + SPH_C64(0x1414B42844A01450), SPH_C64(0xE1E1BADF425BE1A3), + SPH_C64(0x1616A62C4EB01658), SPH_C64(0x3A3AF774D2CD3AE8), + SPH_C64(0x696906D2D06F69B9), SPH_C64(0x090941122D480924), + SPH_C64(0x7070D7E0ADA770DD), SPH_C64(0xB6B66F7154D9B6E2), + SPH_C64(0xD0D01EBDB7CED067), SPH_C64(0xEDEDD6C77E3BED93), + SPH_C64(0xCCCCE285DB2ECC17), SPH_C64(0x42426884572A4215), + SPH_C64(0x98982C2DC2B4985A), SPH_C64(0xA4A4ED550E49A4AA), + SPH_C64(0x28287550885D28A0), SPH_C64(0x5C5C86B831DA5C6D), + SPH_C64(0xF8F86BED3F93F8C7), SPH_C64(0x8686C211A4448622) +}; + +static const uint64_t plain_T7[256] = { + SPH_C64(0x18D83078C0186018), SPH_C64(0x232646AF05238C23), + SPH_C64(0xC6B891F97EC63FC6), SPH_C64(0xE8FBCD6F13E887E8), + SPH_C64(0x87CB13A14C872687), SPH_C64(0xB8116D62A9B8DAB8), + SPH_C64(0x0109020508010401), SPH_C64(0x4F0D9E6E424F214F), + SPH_C64(0x369B6CEEAD36D836), SPH_C64(0xA6FF510459A6A2A6), + SPH_C64(0xD20CB9BDDED26FD2), SPH_C64(0xF50EF706FBF5F3F5), + SPH_C64(0x7996F280EF79F979), SPH_C64(0x6F30DECE5F6FA16F), + SPH_C64(0x916D3FEFFC917E91), SPH_C64(0x52F8A407AA525552), + SPH_C64(0x6047C0FD27609D60), SPH_C64(0xBC35657689BCCABC), + SPH_C64(0x9B372BCDAC9B569B), SPH_C64(0x8E8A018C048E028E), + SPH_C64(0xA3D25B1571A3B6A3), SPH_C64(0x0C6C183C600C300C), + SPH_C64(0x7B84F68AFF7BF17B), SPH_C64(0x35806AE1B535D435), + SPH_C64(0x1DF53A69E81D741D), SPH_C64(0xE0B3DD4753E0A7E0), + SPH_C64(0xD721B3ACF6D77BD7), SPH_C64(0xC29C99ED5EC22FC2), + SPH_C64(0x2E435C966D2EB82E), SPH_C64(0x4B29967A624B314B), + SPH_C64(0xFE5DE121A3FEDFFE), SPH_C64(0x57D5AE1682574157), + SPH_C64(0x15BD2A41A8155415), SPH_C64(0x77E8EEB69F77C177), + SPH_C64(0x37926EEBA537DC37), SPH_C64(0xE59ED7567BE5B3E5), + SPH_C64(0x9F1323D98C9F469F), SPH_C64(0xF023FD17D3F0E7F0), + SPH_C64(0x4A20947F6A4A354A), SPH_C64(0xDA44A9959EDA4FDA), + SPH_C64(0x58A2B025FA587D58), SPH_C64(0xC9CF8FCA06C903C9), + SPH_C64(0x297C528D5529A429), SPH_C64(0x0A5A1422500A280A), + SPH_C64(0xB1507F4FE1B1FEB1), SPH_C64(0xA0C95D1A69A0BAA0), + SPH_C64(0x6B14D6DA7F6BB16B), SPH_C64(0x85D917AB5C852E85), + SPH_C64(0xBD3C677381BDCEBD), SPH_C64(0x5D8FBA34D25D695D), + SPH_C64(0x1090205080104010), SPH_C64(0xF407F503F3F4F7F4), + SPH_C64(0xCBDD8BC016CB0BCB), SPH_C64(0x3ED37CC6ED3EF83E), + SPH_C64(0x052D0A1128051405), SPH_C64(0x6778CEE61F678167), + SPH_C64(0xE497D55373E4B7E4), SPH_C64(0x27024EBB25279C27), + SPH_C64(0x4173825832411941), SPH_C64(0x8BA70B9D2C8B168B), + SPH_C64(0xA7F6530151A7A6A7), SPH_C64(0x7DB2FA94CF7DE97D), + SPH_C64(0x954937FBDC956E95), SPH_C64(0xD856AD9F8ED847D8), + SPH_C64(0xFB70EB308BFBCBFB), SPH_C64(0xEECDC17123EE9FEE), + SPH_C64(0x7CBBF891C77CED7C), SPH_C64(0x6671CCE317668566), + SPH_C64(0xDD7BA78EA6DD53DD), SPH_C64(0x17AF2E4BB8175C17), + SPH_C64(0x47458E4602470147), SPH_C64(0x9E1A21DC849E429E), + SPH_C64(0xCAD489C51ECA0FCA), SPH_C64(0x2D585A99752DB42D), + SPH_C64(0xBF2E637991BFC6BF), SPH_C64(0x073F0E1B38071C07), + SPH_C64(0xADAC472301AD8EAD), SPH_C64(0x5AB0B42FEA5A755A), + SPH_C64(0x83EF1BB56C833683), SPH_C64(0x33B666FF8533CC33), + SPH_C64(0x635CC6F23F639163), SPH_C64(0x0212040A10020802), + SPH_C64(0xAA93493839AA92AA), SPH_C64(0x71DEE2A8AF71D971), + SPH_C64(0xC8C68DCF0EC807C8), SPH_C64(0x19D1327DC8196419), + SPH_C64(0x493B927072493949), SPH_C64(0xD95FAF9A86D943D9), + SPH_C64(0xF231F91DC3F2EFF2), SPH_C64(0xE3A8DB484BE3ABE3), + SPH_C64(0x5BB9B62AE25B715B), SPH_C64(0x88BC0D9234881A88), + SPH_C64(0x9A3E29C8A49A529A), SPH_C64(0x260B4CBE2D269826), + SPH_C64(0x32BF64FA8D32C832), SPH_C64(0xB0597D4AE9B0FAB0), + SPH_C64(0xE9F2CF6A1BE983E9), SPH_C64(0x0F771E33780F3C0F), + SPH_C64(0xD533B7A6E6D573D5), SPH_C64(0x80F41DBA74803A80), + SPH_C64(0xBE27617C99BEC2BE), SPH_C64(0xCDEB87DE26CD13CD), + SPH_C64(0x348968E4BD34D034), SPH_C64(0x483290757A483D48), + SPH_C64(0xFF54E324ABFFDBFF), SPH_C64(0x7A8DF48FF77AF57A), + SPH_C64(0x90643DEAF4907A90), SPH_C64(0x5F9DBE3EC25F615F), + SPH_C64(0x203D40A01D208020), SPH_C64(0x680FD0D56768BD68), + SPH_C64(0x1ACA3472D01A681A), SPH_C64(0xAEB7412C19AE82AE), + SPH_C64(0xB47D755EC9B4EAB4), SPH_C64(0x54CEA8199A544D54), + SPH_C64(0x937F3BE5EC937693), SPH_C64(0x222F44AA0D228822), + SPH_C64(0x6463C8E907648D64), SPH_C64(0xF12AFF12DBF1E3F1), + SPH_C64(0x73CCE6A2BF73D173), SPH_C64(0x1282245A90124812), + SPH_C64(0x407A805D3A401D40), SPH_C64(0x0848102840082008), + SPH_C64(0xC3959BE856C32BC3), SPH_C64(0xECDFC57B33EC97EC), + SPH_C64(0xDB4DAB9096DB4BDB), SPH_C64(0xA1C05F1F61A1BEA1), + SPH_C64(0x8D9107831C8D0E8D), SPH_C64(0x3DC87AC9F53DF43D), + SPH_C64(0x975B33F1CC976697), SPH_C64(0x0000000000000000), + SPH_C64(0xCFF983D436CF1BCF), SPH_C64(0x2B6E5687452BAC2B), + SPH_C64(0x76E1ECB39776C576), SPH_C64(0x82E619B064823282), + SPH_C64(0xD628B1A9FED67FD6), SPH_C64(0x1BC33677D81B6C1B), + SPH_C64(0xB574775BC1B5EEB5), SPH_C64(0xAFBE432911AF86AF), + SPH_C64(0x6A1DD4DF776AB56A), SPH_C64(0x50EAA00DBA505D50), + SPH_C64(0x45578A4C12450945), SPH_C64(0xF338FB18CBF3EBF3), + SPH_C64(0x30AD60F09D30C030), SPH_C64(0xEFC4C3742BEF9BEF), + SPH_C64(0x3FDA7EC3E53FFC3F), SPH_C64(0x55C7AA1C92554955), + SPH_C64(0xA2DB591079A2B2A2), SPH_C64(0xEAE9C96503EA8FEA), + SPH_C64(0x656ACAEC0F658965), SPH_C64(0xBA036968B9BAD2BA), + SPH_C64(0x2F4A5E93652FBC2F), SPH_C64(0xC08E9DE74EC027C0), + SPH_C64(0xDE60A181BEDE5FDE), SPH_C64(0x1CFC386CE01C701C), + SPH_C64(0xFD46E72EBBFDD3FD), SPH_C64(0x4D1F9A64524D294D), + SPH_C64(0x927639E0E4927292), SPH_C64(0x75FAEABC8F75C975), + SPH_C64(0x06360C1E30061806), SPH_C64(0x8AAE0998248A128A), + SPH_C64(0xB24B7940F9B2F2B2), SPH_C64(0xE685D15963E6BFE6), + SPH_C64(0x0E7E1C36700E380E), SPH_C64(0x1FE73E63F81F7C1F), + SPH_C64(0x6255C4F737629562), SPH_C64(0xD43AB5A3EED477D4), + SPH_C64(0xA8814D3229A89AA8), SPH_C64(0x965231F4C4966296), + SPH_C64(0xF962EF3A9BF9C3F9), SPH_C64(0xC5A397F666C533C5), + SPH_C64(0x25104AB135259425), SPH_C64(0x59ABB220F2597959), + SPH_C64(0x84D015AE54842A84), SPH_C64(0x72C5E4A7B772D572), + SPH_C64(0x39EC72DDD539E439), SPH_C64(0x4C1698615A4C2D4C), + SPH_C64(0x5E94BC3BCA5E655E), SPH_C64(0x789FF085E778FD78), + SPH_C64(0x38E570D8DD38E038), SPH_C64(0x8C980586148C0A8C), + SPH_C64(0xD117BFB2C6D163D1), SPH_C64(0xA5E4570B41A5AEA5), + SPH_C64(0xE2A1D94D43E2AFE2), SPH_C64(0x614EC2F82F619961), + SPH_C64(0xB3427B45F1B3F6B3), SPH_C64(0x213442A515218421), + SPH_C64(0x9C0825D6949C4A9C), SPH_C64(0x1EEE3C66F01E781E), + SPH_C64(0x4361865222431143), SPH_C64(0xC7B193FC76C73BC7), + SPH_C64(0xFC4FE52BB3FCD7FC), SPH_C64(0x0424081420041004), + SPH_C64(0x51E3A208B2515951), SPH_C64(0x99252FC7BC995E99), + SPH_C64(0x6D22DAC44F6DA96D), SPH_C64(0x0D651A39680D340D), + SPH_C64(0xFA79E93583FACFFA), SPH_C64(0xDF69A384B6DF5BDF), + SPH_C64(0x7EA9FC9BD77EE57E), SPH_C64(0x241948B43D249024), + SPH_C64(0x3BFE76D7C53BEC3B), SPH_C64(0xAB9A4B3D31AB96AB), + SPH_C64(0xCEF081D13ECE1FCE), SPH_C64(0x1199225588114411), + SPH_C64(0x8F8303890C8F068F), SPH_C64(0x4E049C6B4A4E254E), + SPH_C64(0xB7667351D1B7E6B7), SPH_C64(0xEBE0CB600BEB8BEB), + SPH_C64(0x3CC178CCFD3CF03C), SPH_C64(0x81FD1FBF7C813E81), + SPH_C64(0x944035FED4946A94), SPH_C64(0xF71CF30CEBF7FBF7), + SPH_C64(0xB9186F67A1B9DEB9), SPH_C64(0x138B265F98134C13), + SPH_C64(0x2C51589C7D2CB02C), SPH_C64(0xD305BBB8D6D36BD3), + SPH_C64(0xE78CD35C6BE7BBE7), SPH_C64(0x6E39DCCB576EA56E), + SPH_C64(0xC4AA95F36EC437C4), SPH_C64(0x031B060F18030C03), + SPH_C64(0x56DCAC138A564556), SPH_C64(0x445E88491A440D44), + SPH_C64(0x7FA0FE9EDF7FE17F), SPH_C64(0xA9884F3721A99EA9), + SPH_C64(0x2A6754824D2AA82A), SPH_C64(0xBB0A6B6DB1BBD6BB), + SPH_C64(0xC1879FE246C123C1), SPH_C64(0x53F1A602A2535153), + SPH_C64(0xDC72A58BAEDC57DC), SPH_C64(0x0B531627580B2C0B), + SPH_C64(0x9D0127D39C9D4E9D), SPH_C64(0x6C2BD8C1476CAD6C), + SPH_C64(0x31A462F59531C431), SPH_C64(0x74F3E8B98774CD74), + SPH_C64(0xF615F109E3F6FFF6), SPH_C64(0x464C8C430A460546), + SPH_C64(0xACA5452609AC8AAC), SPH_C64(0x89B50F973C891E89), + SPH_C64(0x14B42844A0145014), SPH_C64(0xE1BADF425BE1A3E1), + SPH_C64(0x16A62C4EB0165816), SPH_C64(0x3AF774D2CD3AE83A), + SPH_C64(0x6906D2D06F69B969), SPH_C64(0x0941122D48092409), + SPH_C64(0x70D7E0ADA770DD70), SPH_C64(0xB66F7154D9B6E2B6), + SPH_C64(0xD01EBDB7CED067D0), SPH_C64(0xEDD6C77E3BED93ED), + SPH_C64(0xCCE285DB2ECC17CC), SPH_C64(0x426884572A421542), + SPH_C64(0x982C2DC2B4985A98), SPH_C64(0xA4ED550E49A4AAA4), + SPH_C64(0x287550885D28A028), SPH_C64(0x5C86B831DA5C6D5C), + SPH_C64(0xF86BED3F93F8C7F8), SPH_C64(0x86C211A444862286) +}; + +/* + * Round constants. + */ + + __constant__ uint64_t InitVector_RC[10]; + +static const uint64_t plain_RC[10] = { + SPH_C64(0x4F01B887E8C62318), + SPH_C64(0x52916F79F5D2A636), + SPH_C64(0x357B0CA38E9BBC60), + SPH_C64(0x57FE4B2EC2D7E01D), + SPH_C64(0xDA4AF09FE5377715), + SPH_C64(0x856BA0B10A29C958), + SPH_C64(0x67053ECBF4105DBD), + SPH_C64(0xD8957DA78B4127E4), + SPH_C64(0x9E4717DD667CEEFB), + SPH_C64(0x33835AAD07BF2DCA) +}; + +/* ====================================================================== */ + +#define BYTE(x, n) ((unsigned)((x) >> (8 * (n))) & 0xFF) +//#define asBYTE(x, n) byte(x,n) +static __device__ __forceinline__ uint64_t ROUND_ELT(const uint64_t* __restrict sharedMemory,uint64_t* in,int i0,int i1,int i2,int i3,int i4,int i5,int i6,int i7) +{ +uint32_t idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7; + + + + +idx0 = ((uint8_t*)in)[8*i0]; +idx1 = ((uint8_t*)in)[8*i1+1]+256; +idx2 = ((uint8_t*)in)[8*i2+2]+512; +idx3 = ((uint8_t*)in)[8*i3+3]+768; +idx4 = ((uint8_t*)in)[8*i4+4]+1024; +idx5 = ((uint8_t*)in)[8*i5+5]+1280; +idx6 = ((uint8_t*)in)[8*i6+6]+1536; +idx7 = ((uint8_t*)in)[8*i7+7]+1792; + +uint64_t S0=sharedMemory[idx0]; +uint64_t S1=sharedMemory[idx1]; +uint64_t S2=sharedMemory[idx2]; +uint64_t S3=sharedMemory[idx3]; +uint64_t S4=sharedMemory[idx4]; +uint64_t S5=sharedMemory[idx5]; +uint64_t S6=sharedMemory[idx6]; +uint64_t S7=sharedMemory[idx7]; + + + + +uint64_t result = xor8(S0, S1, S2, S3, S4, S5, S6, S7); + +return result; + +} + + + +#define ROUND_ELTo(in, i0, i1, i2, i3, i4, i5, i6, i7) \ + ( plain_T0[BYTE(in[i0], 0)] \ + ^ plain_T1[BYTE(in[i1], 1)] \ + ^ plain_T2[BYTE(in[i2], 2)] \ + ^ plain_T3[BYTE(in[i3], 3)] \ + ^ plain_T4[BYTE(in[i4], 4)] \ + ^ plain_T5[BYTE(in[i5], 5)] \ + ^ plain_T6[BYTE(in[i6], 6)] \ + ^ plain_T7[BYTE(in[i7], 7)]) + +#define ROUND_ELTold(in, i0, i1, i2, i3, i4, i5, i6, i7) \ + ( old1_T0[BYTE(in[i0], 0)] \ + ^ old1_T1[BYTE(in[i1], 1)] \ + ^ old1_T2[BYTE(in[i2], 2)] \ + ^ old1_T3[BYTE(in[i3], 3)] \ + ^ old1_T4[BYTE(in[i4], 4)] \ + ^ old1_T5[BYTE(in[i5], 5)] \ + ^ old1_T6[BYTE(in[i6], 6)] \ + ^ old1_T7[BYTE(in[i7], 7)]) + + + +static __device__ __forceinline__ void whirlpool_device_round(const uint64_t* __restrict sharedMemory,uint64_t* n, uint64_t* h) +{ + +uint64_t t0, t1, t2, t3, t4, t5, t6, t7; +uint64_t T0, T1, T2, T3, T4, T5, T6, T7; + + +#pragma unroll + for (unsigned r = 0; r < 10; r ++) { + + t0 = ROUND_ELT(sharedMemory,h, 0, 7, 6, 5, 4, 3, 2, 1); + t1 = ROUND_ELT(sharedMemory,h, 1, 0, 7, 6, 5, 4, 3, 2); + t2 = ROUND_ELT(sharedMemory,h, 2, 1, 0, 7, 6, 5, 4, 3); + t3 = ROUND_ELT(sharedMemory,h, 3, 2, 1, 0, 7, 6, 5, 4); + t4 = ROUND_ELT(sharedMemory,h, 4, 3, 2, 1, 0, 7, 6, 5); + t5 = ROUND_ELT(sharedMemory,h, 5, 4, 3, 2, 1, 0, 7, 6); + t6 = ROUND_ELT(sharedMemory,h, 6, 5, 4, 3, 2, 1, 0, 7); + t7 = ROUND_ELT(sharedMemory,h, 7, 6, 5, 4, 3, 2, 1, 0); + T0 = ROUND_ELT(sharedMemory,n, 0, 7, 6, 5, 4, 3, 2, 1); + T1 = ROUND_ELT(sharedMemory,n, 1, 0, 7, 6, 5, 4, 3, 2); + T2 = ROUND_ELT(sharedMemory,n, 2, 1, 0, 7, 6, 5, 4, 3); + T3 = ROUND_ELT(sharedMemory,n, 3, 2, 1, 0, 7, 6, 5, 4); + T4 = ROUND_ELT(sharedMemory,n, 4, 3, 2, 1, 0, 7, 6, 5); + T5 = ROUND_ELT(sharedMemory,n, 5, 4, 3, 2, 1, 0, 7, 6); + T6 = ROUND_ELT(sharedMemory,n, 6, 5, 4, 3, 2, 1, 0, 7); + T7 = ROUND_ELT(sharedMemory,n, 7, 6, 5, 4, 3, 2, 1, 0); + + h[0] = xor1(t0,InitVector_RC[r]); + h[1] = t1; + h[2] = t2; + h[3] = t3; + h[4] = t4; + h[5] = t5; + h[6] = t6; + h[7] = t7; + + + + n[0] = xor3(T0,t0,InitVector_RC[r]); + n[1] = xor1(T1,h[1]); + n[2] = xor1(T2,h[2]); + n[3] = xor1(T3,h[3]); + n[4] = xor1(T4,h[4]); + n[5] = xor1(T5,h[5]); + n[6] = xor1(T6,h[6]); + n[7] = xor1(T7,h[7]); + + } + + +} + +static __device__ __forceinline__ void whirlpool_device_finalround(const uint64_t* __restrict sharedMemory,uint64_t* n, uint64_t* h) +{ + +uint64_t t0, t1, t2, t3, t4, t5, t6, t7; + + + +#pragma unroll + for (unsigned r = 0; r < 9; r ++) { + + t0 = ROUND_ELT(sharedMemory,h, 0, 7, 6, 5, 4, 3, 2, 1); + t1 = ROUND_ELT(sharedMemory,h, 1, 0, 7, 6, 5, 4, 3, 2); + t2 = ROUND_ELT(sharedMemory,h, 2, 1, 0, 7, 6, 5, 4, 3); + t3 = ROUND_ELT(sharedMemory,h, 3, 2, 1, 0, 7, 6, 5, 4); + t4 = ROUND_ELT(sharedMemory,h, 4, 3, 2, 1, 0, 7, 6, 5); + t5 = ROUND_ELT(sharedMemory,h, 5, 4, 3, 2, 1, 0, 7, 6); + t6 = ROUND_ELT(sharedMemory,h, 6, 5, 4, 3, 2, 1, 0, 7); + t7 = ROUND_ELT(sharedMemory,h, 7, 6, 5, 4, 3, 2, 1, 0); + + h[0] = xor1(t0,InitVector_RC[r]); + h[1] = t1; + h[2] = t2; + h[3] = t3; + h[4] = t4; + h[5] = t5; + h[6] = t6; + h[7] = t7; + + t0 = ROUND_ELT(sharedMemory,n, 0, 7, 6, 5, 4, 3, 2, 1); + t1 = ROUND_ELT(sharedMemory,n, 1, 0, 7, 6, 5, 4, 3, 2); + t2 = ROUND_ELT(sharedMemory,n, 2, 1, 0, 7, 6, 5, 4, 3); + t3 = ROUND_ELT(sharedMemory,n, 3, 2, 1, 0, 7, 6, 5, 4); + t4 = ROUND_ELT(sharedMemory,n, 4, 3, 2, 1, 0, 7, 6, 5); + t5 = ROUND_ELT(sharedMemory,n, 5, 4, 3, 2, 1, 0, 7, 6); + t6 = ROUND_ELT(sharedMemory,n, 6, 5, 4, 3, 2, 1, 0, 7); + t7 = ROUND_ELT(sharedMemory,n, 7, 6, 5, 4, 3, 2, 1, 0); + + n[0] = xor1(t0,h[0]); + n[1] = xor1(t1,h[1]); + n[2] = xor1(t2,h[2]); + n[3] = xor1(t3,h[3]); + n[4] = xor1(t4,h[4]); + n[5] = xor1(t5,h[5]); + n[6] = xor1(t6,h[6]); + n[7] = xor1(t7,h[7]); + + } + h[3] = ROUND_ELT(sharedMemory,h, 3, 2, 1, 0, 7, 6, 5, 4); + t3 = ROUND_ELT(sharedMemory,n, 3, 2, 1, 0, 7, 6, 5, 4); + n[3] = xor1(t3,h[3]); +} + + +static void whirlpool_round(uint64_t* n, uint64_t* h){ + uint64_t t0, t1, t2, t3, t4, t5, t6, t7; + for (unsigned r = 0; r < 10; r ++) { + t0 = ROUND_ELTo(h, 0, 7, 6, 5, 4, 3, 2, 1) ^ plain_RC[r]; + t1 = ROUND_ELTo(h, 1, 0, 7, 6, 5, 4, 3, 2); + t2 = ROUND_ELTo(h, 2, 1, 0, 7, 6, 5, 4, 3); + t3 = ROUND_ELTo(h, 3, 2, 1, 0, 7, 6, 5, 4); + t4 = ROUND_ELTo(h, 4, 3, 2, 1, 0, 7, 6, 5); + t5 = ROUND_ELTo(h, 5, 4, 3, 2, 1, 0, 7, 6); + t6 = ROUND_ELTo(h, 6, 5, 4, 3, 2, 1, 0, 7); + t7 = ROUND_ELTo(h, 7, 6, 5, 4, 3, 2, 1, 0); + + h[0] = t0; + h[1] = t1; + h[2] = t2; + h[3] = t3; + h[4] = t4; + h[5] = t5; + h[6] = t6; + h[7] = t7; + + t0 = ROUND_ELTo(n, 0, 7, 6, 5, 4, 3, 2, 1) ^ h[0]; + t1 = ROUND_ELTo(n, 1, 0, 7, 6, 5, 4, 3, 2) ^ h[1]; + t2 = ROUND_ELTo(n, 2, 1, 0, 7, 6, 5, 4, 3) ^ h[2]; + t3 = ROUND_ELTo(n, 3, 2, 1, 0, 7, 6, 5, 4) ^ h[3]; + t4 = ROUND_ELTo(n, 4, 3, 2, 1, 0, 7, 6, 5) ^ h[4]; + t5 = ROUND_ELTo(n, 5, 4, 3, 2, 1, 0, 7, 6) ^ h[5]; + t6 = ROUND_ELTo(n, 6, 5, 4, 3, 2, 1, 0, 7) ^ h[6]; + t7 = ROUND_ELTo(n, 7, 6, 5, 4, 3, 2, 1, 0) ^ h[7]; + + n[0] = t0; + n[1] = t1; + n[2] = t2; + n[3] = t3; + n[4] = t4; + n[5] = t5; + n[6] = t6; + n[7] = t7; + } +} + +static void whirlpool_round_old(uint64_t* n, uint64_t* h){ + uint64_t t0, t1, t2, t3, t4, t5, t6, t7; + for (unsigned r = 0; r < 10; r ++) { + t0 = ROUND_ELTold(h, 0, 7, 6, 5, 4, 3, 2, 1) ^ old1_RC[r]; + t1 = ROUND_ELTold(h, 1, 0, 7, 6, 5, 4, 3, 2); + t2 = ROUND_ELTold(h, 2, 1, 0, 7, 6, 5, 4, 3); + t3 = ROUND_ELTold(h, 3, 2, 1, 0, 7, 6, 5, 4); + t4 = ROUND_ELTold(h, 4, 3, 2, 1, 0, 7, 6, 5); + t5 = ROUND_ELTold(h, 5, 4, 3, 2, 1, 0, 7, 6); + t6 = ROUND_ELTold(h, 6, 5, 4, 3, 2, 1, 0, 7); + t7 = ROUND_ELTold(h, 7, 6, 5, 4, 3, 2, 1, 0); + + h[0] = t0; + h[1] = t1; + h[2] = t2; + h[3] = t3; + h[4] = t4; + h[5] = t5; + h[6] = t6; + h[7] = t7; + + t0 = ROUND_ELTold(n, 0, 7, 6, 5, 4, 3, 2, 1) ^ h[0]; + t1 = ROUND_ELTold(n, 1, 0, 7, 6, 5, 4, 3, 2) ^ h[1]; + t2 = ROUND_ELTold(n, 2, 1, 0, 7, 6, 5, 4, 3) ^ h[2]; + t3 = ROUND_ELTold(n, 3, 2, 1, 0, 7, 6, 5, 4) ^ h[3]; + t4 = ROUND_ELTold(n, 4, 3, 2, 1, 0, 7, 6, 5) ^ h[4]; + t5 = ROUND_ELTold(n, 5, 4, 3, 2, 1, 0, 7, 6) ^ h[5]; + t6 = ROUND_ELTold(n, 6, 5, 4, 3, 2, 1, 0, 7) ^ h[6]; + t7 = ROUND_ELTold(n, 7, 6, 5, 4, 3, 2, 1, 0) ^ h[7]; + + n[0] = t0; + n[1] = t1; + n[2] = t2; + n[3] = t3; + n[4] = t4; + n[5] = t5; + n[6] = t6; + n[7] = t7; + } +} + +__global__ void whirlpool512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash) +{ + __shared__ uint64_t sharedMemory[2048]; + if(threadIdx.x < 256) + { + sharedMemory[threadIdx.x] = T0[threadIdx.x]; + sharedMemory[threadIdx.x+256] = T1[threadIdx.x]; + sharedMemory[threadIdx.x+512] = T2[threadIdx.x]; + sharedMemory[threadIdx.x+768] = T3[threadIdx.x]; + sharedMemory[threadIdx.x+1024] = T4[threadIdx.x]; + sharedMemory[threadIdx.x+1280] = T5[threadIdx.x]; + sharedMemory[threadIdx.x+1536] = T6[threadIdx.x]; + sharedMemory[threadIdx.x+1792] = T7[threadIdx.x]; + } + +// __syncthreads(); + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread; + + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + + + /// round 2 /////// + ////////////////////////////////// + n[0] = c_PaddedMessage80[8]; //read data + n[1] = c_PaddedMessage80[9]; + ((uint32_t*)n)[3]=cuda_swab32(nounce); + uint64_t tempnonce =n[1]; + n[2] = c_PaddedMessage80[10]; //whirlpool + n[3] = 0; + n[4] = 0; + n[5] = 0; + n[6] = 0; + n[7] = 0x8002000000000000; + + +#pragma unroll 8 + for (int i=0;i<8;i++) { + h[i] = stateo[i]; } //read state +#pragma unroll 8 + for (int i=0;i<8;i++) { + n[i] = xor1(n[i],h[i]);} + + whirlpool_device_round(sharedMemory,n,h); + + state[0] = xor3(stateo[0],n[0],c_PaddedMessage80[8]); + state[1] = xor3(stateo[1],n[1],tempnonce); + state[2] = xor3(stateo[2],n[2],c_PaddedMessage80[10]);// whirlpool + state[3] = xor1(stateo[3],n[3]); + state[4] = xor1(stateo[4],n[4]); + state[5] = xor1(stateo[5],n[5]); + state[6] = xor1(stateo[6],n[6]); + state[7] = xor3(stateo[7],n[7],0x8002000000000000); + + + uint64_t *outHash = (uint64_t *)outputHash + 8 * thread; + + for(int i=0;i<8;i++) + outHash[i] = state[i]; + + + } // thread < threads + +} + +__global__ void __launch_bounds__(512,2) m7_whirlpool512_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + __shared__ uint64_t sharedMemory[2048]; + if(threadIdx.x < 256) + { + sharedMemory[threadIdx.x] = T0[threadIdx.x]; + sharedMemory[threadIdx.x+256] = T1[threadIdx.x]; + sharedMemory[threadIdx.x+512] = T2[threadIdx.x]; + sharedMemory[threadIdx.x+768] = T3[threadIdx.x]; + sharedMemory[threadIdx.x+1024] = T4[threadIdx.x]; + sharedMemory[threadIdx.x+1280] = T5[threadIdx.x]; + sharedMemory[threadIdx.x+1536] = T6[threadIdx.x]; + sharedMemory[threadIdx.x+1792] = T7[threadIdx.x]; + } + + __syncthreads(); + + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread; + + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + + + n[6]=c_PaddedMessage80[14]; + ((uint32_t*)n)[13]=nounce; + uint64_t tempnonce = n[6]; + n[6] = xor1(tempnonce,stateo[6]); + +#pragma unroll 8 + for (int i=0;i<8;i++) { + h[i] = stateo[i]; } //read state +#pragma unroll 6 + for (int i=0;i<6;i++){ + n[i] = xor1(c_PaddedMessage80[8+i],h[i]);} + n[7] = xor1(c_PaddedMessage80[15],h[7]); + + whirlpool_device_round(sharedMemory,n,h); + state[0] = xor3(stateo[0],n[0],c_PaddedMessage80[8+0]); + state[1] = xor3(stateo[1],n[1],c_PaddedMessage80[8+1]); + state[2] = xor3(stateo[2],n[2],c_PaddedMessage80[8+2]); + state[3] = xor3(stateo[3],n[3],c_PaddedMessage80[8+3]); + state[4] = xor3(stateo[4],n[4],c_PaddedMessage80[8+4]); + state[5] = xor3(stateo[5],n[5],c_PaddedMessage80[8+5]); + state[6] = xor3(stateo[6],n[6],tempnonce); + state[7] = xor3(stateo[7],n[7],c_PaddedMessage80[8+7]); + +//// round 3 +#pragma unroll 7 + for (int i=0;i<7;i++) {n[i]=state[i];} + n[7] = xor1(state[7],0xd003000000000000); + +#pragma unroll 8 + for (int i=0;i<8;i++) {h[i] = state[i];} + + + whirlpool_device_round(sharedMemory,n,h); + state[0] = xor1(state[0],n[0]); + state[1] = xor1(state[1],n[1]); + state[2] = xor1(state[2],n[2]); + state[3] = xor1(state[3],n[3]); + state[4] = xor1(state[4],n[4]); + state[5] = xor1(state[5],n[5]); + state[6] = xor1(state[6],n[6]); + state[7] = xor3(state[7],n[7],0xd003000000000000); + + + +#pragma unroll 8 +for (int i=0;i<8;i++) {outputHash[i*threads+thread]=state[i];} + + } // thread < threads + +} + + +__global__ void whirlpool512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +{ + + + __shared__ uint64_t sharedMemory[2048]; + if(threadIdx.x < 256) + { + sharedMemory[threadIdx.x] = T0[threadIdx.x]; + sharedMemory[threadIdx.x+256] = T1[threadIdx.x]; + sharedMemory[threadIdx.x+512] = T2[threadIdx.x]; + sharedMemory[threadIdx.x+768] = T3[threadIdx.x]; + sharedMemory[threadIdx.x+1024] = T4[threadIdx.x]; + sharedMemory[threadIdx.x+1280] = T5[threadIdx.x]; + sharedMemory[threadIdx.x+1536] = T6[threadIdx.x]; + sharedMemory[threadIdx.x+1792] = T7[threadIdx.x]; + } + +// __syncthreads(); + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); + + int hashPosition = nounce - startNounce; + uint64_t *inpHash = (uint64_t*)g_hash + 8*hashPosition; + + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + + + #pragma unroll 8 + for (int i=0;i<8;i++){ + n[i] = inpHash[i];} + #pragma unroll 8 + for (int i=0;i<8;i++){ + h[i] = 0;} + + whirlpool_device_round(sharedMemory,n,h); +#pragma unroll 8 + for (int i=0;i<8;i++) { + state[i] = xor1(n[i],inpHash[i]);} +#pragma unroll 6 + for (int i=1;i<7;i++) { + n[i]=0;} + + n[0] = 0x80; + n[7] = 0x2000000000000; + +#pragma unroll 8 + for (int i=0;i<8;i++) { + h[i] = state[i];} + +#pragma unroll 6 + for (int i=1;i<7;i++) { + n[i] = h[i];} + n[0] = xor1(n[0],h[0]); + n[7] = xor1(n[7],h[7]); + + whirlpool_device_round(sharedMemory,n,h); + state[0] = xor3(state[0],n[0],0x80); + state[1] = xor1(state[1],n[1]); + state[2] = xor1(state[2],n[2]); + state[3] = xor1(state[3],n[3]); + state[4] = xor1(state[4],n[4]); + state[5] = xor1(state[5],n[5]); + state[6] = xor1(state[6],n[6]); + state[7] = xor3(state[7],n[7],0x2000000000000); + + #pragma unroll 8 + for (unsigned i = 0; i < 8; i ++) + inpHash[i] = state[i]; + + } + } + + __global__ void whirlpool512_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector, uint32_t *resNounce) +{ + + + __shared__ uint64_t sharedMemory[2048]; + if(threadIdx.x < 256) + { + sharedMemory[threadIdx.x] = T0[threadIdx.x]; + sharedMemory[threadIdx.x+256] = T1[threadIdx.x]; + sharedMemory[threadIdx.x+512] = T2[threadIdx.x]; + sharedMemory[threadIdx.x+768] = T3[threadIdx.x]; + sharedMemory[threadIdx.x+1024] = T4[threadIdx.x]; + sharedMemory[threadIdx.x+1280] = T5[threadIdx.x]; + sharedMemory[threadIdx.x+1536] = T6[threadIdx.x]; + sharedMemory[threadIdx.x+1792] = T7[threadIdx.x]; + } + +// __syncthreads(); + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); + + int hashPosition = nounce - startNounce; + uint64_t *inpHash = (uint64_t*)g_hash + 8 * hashPosition; + + + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + + + #pragma unroll 8 + for (int i=0;i<8;i++){ + n[i] = inpHash[i];} + #pragma unroll 8 + for (int i=0;i<8;i++){ + h[i] = 0;} + +whirlpool_device_round(sharedMemory,n,h); +#pragma unroll 8 + for (int i=0;i<8;i++) { + state[i] = xor1(n[i],inpHash[i]);} +#pragma unroll 6 + for (int i=1;i<7;i++) { + n[i]=0;} + + n[0] = 0x80; + n[7] = 0x2000000000000; + +#pragma unroll 8 + for (int i=0;i<8;i++) { + h[i] = state[i];} +#pragma unroll 6 + for (int i=1;i<7;i++) { + n[i] = h[i];} + n[0] = xor1(n[0],h[0]); + n[7] = xor1(n[7],h[7]); + + whirlpool_device_finalround(sharedMemory,n,h); + state[3] = xor1(state[3],n[3]); + + + bool rc = false; + if (state[3]<=((uint64_t*)pTarget)[3]) {rc=true;} + + if(rc == true) + { + if(resNounce[0] > nounce) + resNounce[0] = nounce; + } + + } + } + +void whirlpool512_cpu_init(int thr_id, int threads, int flag) +{ + + if (flag==1){ + cudaMemcpyToSymbol(T0,old1_T0,sizeof(old1_T0),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T1,old1_T1,sizeof(old1_T1),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T2,old1_T2,sizeof(old1_T2),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T3,old1_T3,sizeof(old1_T3),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T4,old1_T4,sizeof(old1_T4),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T5,old1_T5,sizeof(old1_T5),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T6,old1_T6,sizeof(old1_T6),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T7,old1_T7,sizeof(old1_T7),0, cudaMemcpyHostToDevice); + + cudaMemcpyToSymbol(InitVector_RC,old1_RC,sizeof(plain_RC),0, cudaMemcpyHostToDevice); + } else { + cudaMemcpyToSymbol(T0,plain_T0,sizeof(plain_T0),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T1,plain_T1,sizeof(plain_T1),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T2,plain_T2,sizeof(plain_T2),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T3,plain_T3,sizeof(plain_T3),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T4,plain_T4,sizeof(plain_T4),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T5,plain_T5,sizeof(plain_T5),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T6,plain_T6,sizeof(plain_T6),0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(T7,plain_T7,sizeof(plain_T7),0, cudaMemcpyHostToDevice); + + cudaMemcpyToSymbol(InitVector_RC,plain_RC,sizeof(plain_RC),0, cudaMemcpyHostToDevice); + } + cudaMalloc(&d_WNonce[thr_id], sizeof(uint32_t)); + cudaMallocHost(&d_wnounce[thr_id], 1*sizeof(uint32_t)); + +} + + +__host__ void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +{ + + const int threadsperblock = 512; // Alignment mit mixtob Grösse. NICHT ÄNDERN + + // berechne wie viele Thread Blocks wir brauchen + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + + whirlpool512_gpu_hash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector); + + MyStreamSynchronize(NULL, order, thr_id); +} + +__host__ uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +{ + uint32_t result = 0xffffffff; + cudaMemset(d_WNonce[thr_id], 0xff, sizeof(uint32_t)); + const int threadsperblock = 512; // maximize occupancy + + dim3 grid(threads/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + + whirlpool512_gpu_finalhash_64<<>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector,d_WNonce[thr_id]); + + MyStreamSynchronize(NULL, order, thr_id); + cudaMemcpy(d_wnounce[thr_id], d_WNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); + //cudaThreadSynchronize(); + result = *d_wnounce[thr_id]; + + return result; + + +} + +__host__ void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +{ + + const int threadsperblock = 512; + dim3 grid((threads + threadsperblock-1)/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + + whirlpool512_gpu_hash_80<<>>(threads, startNounce, d_outputHash); + + MyStreamSynchronize(NULL, order, thr_id); +} + +__host__ void m7_whirlpool512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +{ + + const int threadsperblock = 512; + dim3 block(threadsperblock); + dim3 grid(threads/threadsperblock); + + size_t shared_size =0; + m7_whirlpool512_gpu_hash_120<<>>(threads, startNounce, d_outputHash); + + MyStreamSynchronize(NULL, order, thr_id); +} + + +__host__ void whirlpool512_setBlock_80(void *pdata, const void *ptarget) +{ + unsigned char PaddedMessage[128]; + uint8_t ending =0x80; + memcpy(PaddedMessage, pdata, 80); + memset(PaddedMessage+80, ending, 1); + memset(PaddedMessage+81, 0, 47); + cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + + uint64_t* alt_data = (uint64_t*) pdata; + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + h[0] = h[1] = h[2] = h[3] = h[4] = h[5] = h[6] = h[7] = 0; + for (int i=0;i<8;i++) {n[i]=alt_data[i];} + whirlpool_round_old(n, h); + for (int i=0;i<8;i++) {state[i]=n[i]^alt_data[i];} + cudaMemcpyToSymbol( stateo, state, 8*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + +} + +__host__ void whirlpool512_setBlock_120(void *pdata) +{ + unsigned char PaddedMessage[128]; + uint8_t ending =0x80; + memcpy(PaddedMessage, pdata, 122); + memset(PaddedMessage+122,ending,1); + memset(PaddedMessage+123, 0, 5); //useless + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + + uint64_t* alt_data = (uint64_t*) pdata; + uint64_t state[8]; + uint64_t n[8]; + uint64_t h[8]; + h[0] = h[1] = h[2] = h[3] = h[4] = h[5] = h[6] = h[7] = 0; + for (int i=0;i<8;i++) {n[i]=alt_data[i];} + whirlpool_round(n, h); + for (int i=0;i<8;i++) {state[i]=n[i]^alt_data[i];} + cudaMemcpyToSymbol( stateo, state, 8*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + + +} \ No newline at end of file diff --git a/x13/cuda_x13_hamsi512.cu b/x13/cuda_x13_hamsi512.cu index a9039a9d74..e67e6afe63 100644 --- a/x13/cuda_x13_hamsi512.cu +++ b/x13/cuda_x13_hamsi512.cu @@ -37,32 +37,36 @@ * @author phm */ +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include // aus heavy.cu extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); -typedef unsigned char uint8_t; -typedef unsigned short uint16_t; -typedef unsigned int uint32_t; -typedef unsigned long long uint64_t; +typedef unsigned char BitSequence; + + +#include "cuda_helper.h" #define SPH_C64(x) ((uint64_t)(x ## ULL)) #define SPH_C32(x) ((uint32_t)(x ## U)) #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) -#define SWAB32(x) ( __byte_perm(x, x, 0x0123) ) +#define SWAB32(x) cuda_swab32(x) +#define ROTL32(x,n) SPH_ROTL32(x,n) -#if __CUDA_ARCH__ < 350 - // Kepler (Compute 3.0) - #define ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) -#else - // Kepler (Compute 3.5) - #define ROTL32(x, n) __funnelshift_l( (x), (x), (n) ) -#endif static __constant__ uint32_t d_alpha_n[32]; static __constant__ uint32_t d_alpha_f[32]; static __constant__ uint32_t d_T512[64][16]; + + static const uint32_t alpha_n[] = { SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), diff --git a/x13/fresh.cu b/x13/fresh.cu new file mode 100644 index 0000000000..8016f37ab1 --- /dev/null +++ b/x13/fresh.cu @@ -0,0 +1,166 @@ +/* + * fresh algorithm built on cbuchner1's original X11 + * + */ + +extern "C" +{ +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + +extern void x11_shavite512_cpu_init(int thr_id, int threads); +extern void x11_shavite512_setBlock_80(void *pdata); +extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_simd512_cpu_init(int thr_id, int threads); +extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_echo512_cpu_init(int thr_id, int threads); +extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); + +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + +extern void quark_compactTest_cpu_init(int thr_id, int threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, + uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, + int order); + +// fresh Hashfunktion +inline void fresh_hash(void *state, const void *input) +{ + // shavite-simd-shavite-simd-echo + + + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + + uint32_t hash[16]; + + // shavite 1 + sph_shavite512_init(&ctx_shavite); + sph_shavite512 (&ctx_shavite, input, 80); + sph_shavite512_close(&ctx_shavite, (void*) hash); + + // simd 1 + sph_simd512_init(&ctx_simd); + sph_simd512 (&ctx_simd, (const void*) hash, 64); + sph_simd512_close(&ctx_simd, (void*) hash); + + // shavite 2 + sph_shavite512_init(&ctx_shavite); + sph_shavite512 (&ctx_shavite, (const void*) hash, 64); + sph_shavite512_close(&ctx_shavite, (void*) hash); + + // simd 2 + sph_simd512_init(&ctx_simd); + sph_simd512 (&ctx_simd, (const void*) hash, 64); + sph_simd512_close(&ctx_simd, (void*) hash); + + // echo + sph_echo512_init(&ctx_echo); + sph_echo512 (&ctx_echo, (const void*) hash, 64); + sph_echo512_close(&ctx_echo, (void*) hash); + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + x11_shavite512_cpu_init(thr_id, throughput); + x11_simd512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + + + + quark_check_cpu_init(thr_id, throughput); + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + x11_shavite512_setBlock_80((void*)endiandata); + quark_check_cpu_setTarget(ptarget); + do { + int order = 0; + + // Shavite512 + x11_shavite512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + // SIMD512 + x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // Shavite 512 + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // SIMD512 + x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // echo + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // Scan nach Gewinner Hashes auf der GPU + + uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + fresh_hash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce,vhash64[7],Htarg); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/x13/goalcoin.cu b/x13/goalcoin.cu new file mode 100644 index 0000000000..c89df37678 --- /dev/null +++ b/x13/goalcoin.cu @@ -0,0 +1,222 @@ +/* + * Goalcoin + * + */ + +extern "C" +{ +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" + +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" + +#include "sph/sph_hamsi.h" +#include "sph/sph_fugue.h" + +#include "sph/sph_shabal.h" +#include "sph/sph_whirlpool.h" + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + +extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_setBlock_80(void *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_bmw512_cpu_init(int thr_id, int threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_groestl512_cpu_init(int thr_id, int threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_skein512_cpu_init(int thr_id, int threads); +extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_keccak512_cpu_init(int thr_id, int threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_jh512_cpu_init(int thr_id, int threads); +extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_luffa512_cpu_init(int thr_id, int threads); +extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_cubehash512_cpu_init(int thr_id, int threads); +extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_shavite512_cpu_init(int thr_id, int threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_simd512_cpu_init(int thr_id, int threads); +extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_echo512_cpu_init(int thr_id, int threads); +extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_hamsi512_cpu_init(int thr_id, int threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_fugue512_cpu_init(int thr_id, int threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_shabal512_cpu_init(int thr_id, int threads); +extern void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void whirlpool512_cpu_init(int thr_id, int threads,int flag); +extern void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget); + + + +// goalcoin hash function +inline void goalhash(void *state, const void *input) +{ + // blake-groestl-jh-keccak-skein-whirlpool + + sph_blake512_context ctx_blake; + + sph_groestl512_context ctx_groestl; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_skein512_context ctx_skein; + sph_whirlpool_context ctx_whirlpool; + + uint32_t hash[16]; + + sph_blake512_init(&ctx_blake); + // ZBLAKE; + sph_blake512 (&ctx_blake, input, 80); + sph_blake512_close(&ctx_blake, (void*) hash); + + + + sph_groestl512_init(&ctx_groestl); + // ZGROESTL; + sph_groestl512 (&ctx_groestl, (const void*) hash, 64); + sph_groestl512_close(&ctx_groestl, (void*) hash); + + sph_jh512_init(&ctx_jh); + // ZJH; + sph_jh512 (&ctx_jh, (const void*) hash, 64); + sph_jh512_close(&ctx_jh, (void*) hash); + + sph_keccak512_init(&ctx_keccak); + // ZKECCAK; + sph_keccak512 (&ctx_keccak, (const void*) hash, 64); + sph_keccak512_close(&ctx_keccak, (void*) hash); + + sph_skein512_init(&ctx_skein); + // ZSKEIN; + sph_skein512 (&ctx_skein, (const void*) hash, 64); + sph_skein512_close(&ctx_skein, (void*) hash); + + sph_whirlpool_init(&ctx_whirlpool); + sph_whirlpool (&ctx_whirlpool, (const void*) hash, 64); + sph_whirlpool_close(&ctx_whirlpool, (void*) hash); + + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_goal(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + quark_blake512_cpu_init(thr_id, throughput); + quark_groestl512_cpu_init(thr_id, throughput); + quark_skein512_cpu_init(thr_id, throughput); + quark_keccak512_cpu_init(thr_id, throughput); + quark_jh512_cpu_init(thr_id, throughput); + whirlpool512_cpu_init(thr_id, throughput,0); + + + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + quark_blake512_cpu_setBlock_80((void*)endiandata); + whirlpool512_setBlock_80((void*)endiandata, ptarget); + + do { + int order = 0; + + // erstes Blake512 Hash mit CUDA + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Groestl512 + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für JH512 + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Keccak512 + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + // das ist der unbedingte Branch für Skein512 + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // Scan nach Gewinner Hashes auf der GPU + uint32_t foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + goalhash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/x13/m7.cu b/x13/m7.cu new file mode 100644 index 0000000000..c116011394 --- /dev/null +++ b/x13/m7.cu @@ -0,0 +1,341 @@ +/* + * m7 algorithm + * + */ + +extern "C" +{ +#include "sph/sph_sha2.h" +#include "sph/sph_keccak.h" +#include "sph/sph_ripemd.h" +#include "sph/sph_haval.h" +#include "sph/sph_tiger.h" +#include "sph/sph_whirlpool.h" +#include "sph/sph_blake.h" +#include "miner.h" +} +//#include "mpir.h" + +extern int device_map[8]; + + +static uint64_t *d_hash[8]; +static uint64_t *KeccakH[8]; +static uint64_t *Sha512H[8]; +static uint64_t *d_prod0[8]; +static uint64_t *d_prod1[8]; + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +/* +static void mpz_set_uint256(mpz_t r, uint8_t *u) +{ + mpz_import(r, 32 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u); +} + +static void mpz_get_uint256(mpz_t r, uint8_t *u) +{ + u=0; + mpz_export(u, 0, -1, sizeof(unsigned long), -1, 0, r); +} + +static void mpz_set_uint512(mpz_t r, uint8_t *u) +{ + mpz_import(r, 64 / sizeof(unsigned long), -1, sizeof(unsigned long), -1, 0, u); +} + +static void set_one_if_zero(uint8_t *hash512) { + for (int i = 0; i < 32; i++) { + if (hash512[i] != 0) { + return; + } + } + hash512[0] = 1; +} +*/ +//extern uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern uint32_t m7_sha256_cpu_hash_300(int thr_id, int threads, uint32_t startNounce, uint64_t *d_nonceVector, uint64_t *d_hash, int order); + +extern void m7_sha256_setBlock_120(void *data,const void *ptarget); +extern void m7_sha256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); +extern void m7_sha256_cpu_init(int thr_id, int threads); + + +extern void sha512_cpu_init(int thr_id, int threads); +extern void sha512_setBlock_120(void *pdata); +extern void m7_sha512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order); + +extern void ripemd160_cpu_init(int thr_id, int threads); +extern void ripemd160_setBlock_120(void *pdata); +extern void m7_ripemd160_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order); + +extern void tiger192_cpu_init(int thr_id, int threads); +extern void tiger192_setBlock_120(void *pdata); +extern void m7_tiger192_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order); + + +extern void m7_bigmul_init(int thr_id, int threads); +extern void m7_bigmul_unroll1_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order); +extern void m7_bigmul_unroll2_cpu(int thr_id, int threads,uint64_t* Hash1, uint64_t* Hash2,uint64_t *finalHash,int order); + +extern void cpu_mul(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order); +extern void cpu_mulT4(int thr_id, int threads, uint32_t alegs, uint32_t blegs, uint64_t *g_a, uint64_t *g_b, uint64_t *g_p, int order); +extern void mul_init(); + + +extern void m7_keccak512_setBlock_120(void *pdata); +extern void m7_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order); +extern void m7_keccak512_cpu_init(int thr_id, int threads); + +extern void whirlpool512_cpu_init(int thr_id, int threads, int flag); +extern void whirlpool512_setBlock_120(void *pdata); +extern void m7_whirlpool512_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); + +extern void haval256_cpu_init(int thr_id, int threads); +extern void haval256_setBlock_120(void *data); +extern void m7_haval256_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); + + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); +extern uint32_t quark_check_cpu_hash_120(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint64_t *d_inputHash, int order); + + + +// m7 Hashfunktion +/* +inline void m7_hash(void *state, const void *input,uint32_t TheNonce, int debug) +{ + // sha256(sha256*sha512*keccak512*ripemd160*haval*tiger1*whirlpool) good luck with that... + + char data_str[245], hash_str[65], target_str[65]; + uint8_t *bdata = 0; + mpz_t bns[7]; + mpz_t product; + int rc = 0; + + for(int i=0; i < 7; i++){ + mpz_init(bns[i]); + } + mpz_init(product); + + + uint32_t data[32] ; + uint32_t *data_p64 = data + (116 / sizeof(data[0])); + uint8_t bhash[7][64]; + uint32_t hash[8]; + memcpy(data,input,122); + + + int M7_MIDSTATE_LEN = 116; + for(int i=0; i < 7; i++){ + mpz_init(bns[i]); + } + + sph_sha256_context ctx_final_sha256; + + sph_sha256_context ctx_sha256; + sph_sha512_context ctx_sha512; + sph_keccak512_context ctx_keccak; + sph_whirlpool_context ctx_whirlpool; + sph_haval256_5_context ctx_haval; + sph_tiger_context ctx_tiger; + sph_ripemd160_context ctx_ripemd; + + sph_sha256_init(&ctx_sha256); + sph_sha256 (&ctx_sha256, data, M7_MIDSTATE_LEN); + + sph_sha512_init(&ctx_sha512); + sph_sha512 (&ctx_sha512, data, M7_MIDSTATE_LEN); + + sph_keccak512_init(&ctx_keccak); + sph_keccak512 (&ctx_keccak, data, M7_MIDSTATE_LEN); + + sph_whirlpool_init(&ctx_whirlpool); + sph_whirlpool (&ctx_whirlpool, data, M7_MIDSTATE_LEN); + + sph_haval256_5_init(&ctx_haval); + sph_haval256_5 (&ctx_haval, data, M7_MIDSTATE_LEN); + + sph_tiger_init(&ctx_tiger); + sph_tiger (&ctx_tiger, data, M7_MIDSTATE_LEN); + + sph_ripemd160_init(&ctx_ripemd); + sph_ripemd160 (&ctx_ripemd, data, M7_MIDSTATE_LEN); + + sph_sha256_context ctx2_sha256; + sph_sha512_context ctx2_sha512; + sph_keccak512_context ctx2_keccak; + sph_whirlpool_context ctx2_whirlpool; + sph_haval256_5_context ctx2_haval; + sph_tiger_context ctx2_tiger; + sph_ripemd160_context ctx2_ripemd; + + data[29] = TheNonce; + + memset(bhash, 0, 7 * 64); + + ctx2_sha256 = ctx_sha256; + sph_sha256 (&ctx2_sha256, data_p64, 122 - M7_MIDSTATE_LEN); + sph_sha256_close(&ctx2_sha256, (void*)(bhash[0])); + + ctx2_sha512 = ctx_sha512; + sph_sha512 (&ctx2_sha512, data_p64, 122 - M7_MIDSTATE_LEN); + sph_sha512_close(&ctx2_sha512, (void*)(bhash[1])); + + ctx2_keccak = ctx_keccak; + sph_keccak512 (&ctx2_keccak, data_p64, 122 - M7_MIDSTATE_LEN); + sph_keccak512_close(&ctx2_keccak, (void*)(bhash[2])); + + ctx2_whirlpool = ctx_whirlpool; + sph_whirlpool (&ctx2_whirlpool, data_p64, 122 - M7_MIDSTATE_LEN); + sph_whirlpool_close(&ctx2_whirlpool, (void*)(bhash[3])); + + ctx2_haval = ctx_haval; + sph_haval256_5 (&ctx2_haval, data_p64, 122 - M7_MIDSTATE_LEN); + sph_haval256_5_close(&ctx2_haval, (void*)(bhash[4])); + + ctx2_tiger = ctx_tiger; + sph_tiger (&ctx2_tiger, data_p64, 122 - M7_MIDSTATE_LEN); + sph_tiger_close(&ctx2_tiger, (void*)(bhash[5])); + + ctx2_ripemd = ctx_ripemd; + sph_ripemd160 (&ctx2_ripemd, data_p64, 122 - M7_MIDSTATE_LEN); + sph_ripemd160_close(&ctx2_ripemd, (void*)(bhash[6])); +if (debug == 1) { + for (int i=0;i<16;i++) {applog(LOG_INFO,"sha256[%d]=%02x %02x %02x %02x sha512[%d]=%02x %02x %02x %02x keccak[%d]=%02x %02x %02x %02x whirlpool[2][%d]=%02x %02x %02x %02x haval[%d]=%02x %02x %02x %02x tiger[%d]=%02x %02x %02x %02x ripemd[%d]=%02x %02x %02x %02x\n", + i,bhash[0][4*i+3],bhash[0][4*i+2],bhash[0][4*i+1],bhash[0][4*i+0], + i,bhash[1][4*i+3],bhash[1][4*i+2],bhash[1][4*i+1],bhash[1][4*i+0], + i,bhash[2][4*i+3],bhash[2][4*i+2],bhash[2][4*i+1],bhash[2][4*i+0], + i,bhash[3][4*i+3],bhash[3][4*i+2],bhash[3][4*i+1],bhash[3][4*i+0], + i,bhash[4][4*i+3],bhash[4][4*i+2],bhash[4][4*i+1],bhash[4][4*i+0], + i,bhash[5][4*i+3],bhash[5][4*i+2],bhash[5][4*i+1],bhash[5][4*i+0], + i,bhash[6][4*i+3],bhash[6][4*i+2],bhash[6][4*i+1],bhash[6][4*i+0] + );} +} + for(int i=0; i < 7; i++){ + set_one_if_zero(bhash[i]); + mpz_set_uint512(bns[i],bhash[i]); + } + + for(int i=6; i > 0; i--){ + mpz_mul(bns[i-1], bns[i-1], bns[i]); + } + + int bytes = mpz_sizeinbase(bns[0], 256); + bdata = (uint8_t *)realloc(bdata, bytes); + mpz_export((void *)bdata, NULL, -1, 1, 0, 0, bns[0]); + sph_sha256_init(&ctx_final_sha256); + sph_sha256 (&ctx_final_sha256, bdata, bytes); + sph_sha256_close(&ctx_final_sha256, (void*)(hash)); + + memcpy(state, hash, 32); +} +*/ +extern float tp_coef[8]; +extern bool opt_benchmark; + + +extern "C" int scanhash_m7(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + +// const int throughput = 256*256*16; + const int throughput = 2560*512*1; + + const uint32_t FirstNonce = pdata[29]; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + + cudaSetDevice(device_map[thr_id]); + cudaMalloc(&d_prod0[thr_id], 35 *sizeof(uint64_t) * throughput*tp_coef[thr_id]); + cudaMalloc(&d_prod1[thr_id], 38 *sizeof(uint64_t) * throughput*tp_coef[thr_id]); + cudaMalloc(&KeccakH[thr_id], 8 *sizeof(uint64_t) * throughput*tp_coef[thr_id]); + cudaMalloc(&Sha512H[thr_id], 8 *sizeof(uint64_t) * throughput*tp_coef[thr_id]); + + m7_sha256_cpu_init(thr_id, throughput*tp_coef[thr_id]); + sha512_cpu_init(thr_id, throughput*tp_coef[thr_id]); + m7_keccak512_cpu_init(thr_id, throughput*tp_coef[thr_id]); + haval256_cpu_init(thr_id, throughput*tp_coef[thr_id]); + tiger192_cpu_init(thr_id, throughput*tp_coef[thr_id]); + whirlpool512_cpu_init(thr_id, throughput*tp_coef[thr_id],0); + ripemd160_cpu_init(thr_id, throughput*tp_coef[thr_id]); + quark_check_cpu_init(thr_id, throughput*tp_coef[thr_id]); + m7_bigmul_init(thr_id, throughput*tp_coef[thr_id]); + mul_init(); + init[thr_id] = true; + } + + const uint32_t Htarg = ptarget[7]; + + whirlpool512_setBlock_120((void*)pdata); + m7_sha256_setBlock_120((void*)pdata,ptarget); + sha512_setBlock_120((void*)pdata); + haval256_setBlock_120((void*)pdata); + m7_keccak512_setBlock_120((void*)pdata); + ripemd160_setBlock_120((void*)pdata); + tiger192_setBlock_120((void*)pdata); + quark_check_cpu_setTarget(ptarget); + + do { + + int order = 0; + + + m7_keccak512_cpu_hash(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++); + + m7_sha512_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], Sha512H[thr_id], order++); + + cpu_mulT4(0, throughput*tp_coef[thr_id], 8, 8, Sha512H[thr_id], KeccakH[thr_id], d_prod0[thr_id],order); //64 + MyStreamSynchronize(0,order++,thr_id); + + m7_whirlpool512_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++); + + cpu_mulT4(0, throughput*tp_coef[thr_id],8, 16, KeccakH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //128 + MyStreamSynchronize(0,order++,thr_id); + +m7_sha256_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++); +cpu_mulT4(0, throughput*tp_coef[thr_id], 4, 24, KeccakH[thr_id], d_prod1[thr_id], d_prod0[thr_id],order); //96 + MyStreamSynchronize(0,order++,thr_id); + + m7_haval256_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++); +cpu_mulT4(0, throughput*tp_coef[thr_id], 4, 28, KeccakH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); //112 + MyStreamSynchronize(0,order++,thr_id); + + m7_tiger192_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++); + m7_bigmul_unroll1_cpu(thr_id, throughput*tp_coef[thr_id], KeccakH[thr_id], d_prod1[thr_id], d_prod0[thr_id],order); + MyStreamSynchronize(0,order++,thr_id); + + m7_ripemd160_cpu_hash_120(thr_id, throughput*tp_coef[thr_id], pdata[29], KeccakH[thr_id], order++); + + m7_bigmul_unroll2_cpu(thr_id, throughput*tp_coef[thr_id], KeccakH[thr_id], d_prod0[thr_id], d_prod1[thr_id],order); + MyStreamSynchronize(0,order++,thr_id); + + +uint32_t foundNonce = m7_sha256_cpu_hash_300(thr_id, throughput*tp_coef[thr_id], pdata[29], NULL, d_prod1[thr_id], order); +if (foundNonce != 0xffffffff) { + uint32_t vhash64[8]; +// m7_hash(vhash64, pdata,foundNonce,0); + +// if( (vhash64[7]<=Htarg ) ) { + pdata[29] = foundNonce; + *hashes_done = foundNonce - FirstNonce + 1; + return 1; +// } else { +// applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce,vhash64[7],Htarg); +// m7_hash(vhash64, pdata,foundNonce,1); +// } + } // foundNonce + pdata[29] += throughput*tp_coef[thr_id]; +*hashes_done +=throughput*tp_coef[thr_id]; + } while (pdata[29] < max_nonce && !work_restart[thr_id].restart); + +//*hashes_done = pdata[29] - FirstNonce + 1; + return 0; +} diff --git a/x13/m7_keccak512.cu b/x13/m7_keccak512.cu new file mode 100644 index 0000000000..8a295d4fdc --- /dev/null +++ b/x13/m7_keccak512.cu @@ -0,0 +1,387 @@ + +#include +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + + +#include +#include +#include + + +extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +extern int compute_version[8]; + +#include "cuda_helper.h" +static __constant__ uint64_t stateo[25]; +static __constant__ uint64_t RC[24]; +static const uint64_t cpu_RC[24] = { + 0x0000000000000001ull, 0x0000000000008082ull, + 0x800000000000808aull, 0x8000000080008000ull, + 0x000000000000808bull, 0x0000000080000001ull, + 0x8000000080008081ull, 0x8000000000008009ull, + 0x000000000000008aull, 0x0000000000000088ull, + 0x0000000080008009ull, 0x000000008000000aull, + 0x000000008000808bull, 0x800000000000008bull, + 0x8000000000008089ull, 0x8000000000008003ull, + 0x8000000000008002ull, 0x8000000000000080ull, + 0x000000000000800aull, 0x800000008000000aull, + 0x8000000080008081ull, 0x8000000000008080ull, + 0x0000000080000001ull, 0x8000000080008008ull +}; + +static __device__ __forceinline__ void keccak_block(uint64_t *s, const uint64_t *keccak_round_constants) { + size_t i; + uint64_t t[5], u[5], v, w; + + /* absorb input */ + +//#pragma unroll 24 + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + + uint64_t temp0,temp1,temp2,temp3,temp4; + temp0 = ROTL64(t[0], 1); + temp1 = ROTL64(t[1], 1); + temp2 = ROTL64(t[2], 1); + temp3 = ROTL64(t[3], 1); + temp4 = ROTL64(t[4], 1); + u[0] = xor1(t[4],temp1); + u[1] = xor1(t[0],temp2); + u[2] = xor1(t[1],temp3); + u[3] = xor1(t[2],temp4); + u[4] = xor1(t[3],temp0); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[ 1]; + s[ 1] = ROTL64(s[ 6], 44); + s[ 6] = ROTL64(s[ 9], 20); + s[ 9] = ROTL64(s[22], 61); + s[22] = ROTL64(s[14], 39); + s[14] = ROTL64(s[20], 18); + s[20] = ROTL64(s[ 2], 62); + s[ 2] = ROTL64(s[12], 43); + s[12] = ROTL64(s[13], 25); + s[13] = ROTL64(s[19], 8); + s[19] = ROTL64(s[23], 56); + s[23] = ROTL64(s[15], 41); + s[15] = ROTL64(s[ 4], 27); + s[ 4] = ROTL64(s[24], 14); + s[24] = ROTL64(s[21], 2); + s[21] = ROTL64(s[ 8], 55); + s[ 8] = ROTL64(s[16], 45); + s[16] = ROTL64(s[ 5], 36); + s[ 5] = ROTL64(s[ 3], 28); + s[ 3] = ROTL64(s[18], 21); + s[18] = ROTL64(s[17], 15); + s[17] = ROTL64(s[11], 10); + s[11] = ROTL64(s[ 7], 6); + s[ 7] = ROTL64(s[10], 3); + s[10] = ROTL64( v, 1); + + /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ + + v = s[ 0]; w = s[ 1]; + s[ 0] ^= (~w) & s[ 2]; + s[ 1] ^= (~s[ 2]) & s[ 3]; + s[ 2] ^= (~s[ 3]) & s[ 4]; + s[ 3] ^= (~s[ 4]) & v; + s[ 4] ^= (~v) & w; + v = s[ 5]; w = s[ 6]; + s[ 5] ^= (~w) & s[ 7]; + s[ 6] ^= (~s[ 7]) & s[ 8]; + s[ 7] ^= (~s[ 8]) & s[ 9]; + s[ 8] ^= (~s[ 9]) & v; + s[ 9] ^= (~v) & w; + v = s[10]; w = s[11]; + s[10] ^= (~w) & s[12]; + s[11] ^= (~s[12]) & s[13]; + s[12] ^= (~s[13]) & s[14]; + s[13] ^= (~s[14]) & v; + s[14] ^= (~v) & w; + v = s[15]; w = s[16]; + s[15] ^= (~w) & s[17]; + s[16] ^= (~s[17]) & s[18]; + s[17] ^= (~s[18]) & s[19]; + s[18] ^= (~s[19]) & v; + s[19] ^= (~v) & w; + v = s[20]; w = s[21]; + s[20] ^= (~w) & s[22]; + s[21] ^= (~s[22]) & s[23]; + s[22] ^= (~s[23]) & s[24]; + s[23] ^= (~s[24]) & v; + s[24] ^= (~v) & w; + + /* iota: a[0,0] ^= round constant */ + s[0] ^= keccak_round_constants[i]; + } +} + +static __device__ __forceinline__ void keccak_blockv35(uint2 *s, const uint64_t *keccak_round_constants) { + size_t i; + uint2 t[5], u[5], v, w; + + + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + u[0] = t[4] ^ ROL2(t[1], 1); + u[1] = t[0] ^ ROL2(t[2], 1); + u[2] = t[1] ^ ROL2(t[3], 1); + u[3] = t[2] ^ ROL2(t[4], 1); + u[4] = t[3] ^ ROL2(t[0], 1); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[1]; + s[1] = ROL2(s[6], 44); + s[6] = ROL2(s[9], 20); + s[9] = ROL2(s[22], 61); + s[22] = ROL2(s[14], 39); + s[14] = ROL2(s[20], 18); + s[20] = ROL2(s[2], 62); + s[2] = ROL2(s[12], 43); + s[12] = ROL2(s[13], 25); + s[13] = ROL2(s[19], 8); + s[19] = ROL2(s[23], 56); + s[23] = ROL2(s[15], 41); + s[15] = ROL2(s[4], 27); + s[4] = ROL2(s[24], 14); + s[24] = ROL2(s[21], 2); + s[21] = ROL2(s[8], 55); + s[8] = ROL2(s[16], 45); + s[16] = ROL2(s[5], 36); + s[5] = ROL2(s[3], 28); + s[3] = ROL2(s[18], 21); + s[18] = ROL2(s[17], 15); + s[17] = ROL2(s[11], 10); + s[11] = ROL2(s[7], 6); + s[7] = ROL2(s[10], 3); + s[10] = ROL2(v, 1); + + /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ + v = s[0]; w = s[1]; s[0] ^= (~w) & s[2]; s[1] ^= (~s[2]) & s[3]; s[2] ^= (~s[3]) & s[4]; s[3] ^= (~s[4]) & v; s[4] ^= (~v) & w; + v = s[5]; w = s[6]; s[5] ^= (~w) & s[7]; s[6] ^= (~s[7]) & s[8]; s[7] ^= (~s[8]) & s[9]; s[8] ^= (~s[9]) & v; s[9] ^= (~v) & w; + v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; + v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; + v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; + + /* iota: a[0,0] ^= round constant */ + s[0] ^= vectorize(keccak_round_constants[i]); + } +} + + +static __forceinline__ void keccak_block_host(uint64_t *s, const uint64_t *keccak_round_constants) { + size_t i; + uint64_t t[5], u[5], v, w; + + /* absorb input */ + + for (i = 0; i < 24; i++) { + /* theta: c = a[0,i] ^ a[1,i] ^ .. a[4,i] */ + t[0] = s[0] ^ s[5] ^ s[10] ^ s[15] ^ s[20]; + t[1] = s[1] ^ s[6] ^ s[11] ^ s[16] ^ s[21]; + t[2] = s[2] ^ s[7] ^ s[12] ^ s[17] ^ s[22]; + t[3] = s[3] ^ s[8] ^ s[13] ^ s[18] ^ s[23]; + t[4] = s[4] ^ s[9] ^ s[14] ^ s[19] ^ s[24]; + + /* theta: d[i] = c[i+4] ^ rotl(c[i+1],1) */ + u[0] = t[4] ^ ROTL64(t[1], 1); + u[1] = t[0] ^ ROTL64(t[2], 1); + u[2] = t[1] ^ ROTL64(t[3], 1); + u[3] = t[2] ^ ROTL64(t[4], 1); + u[4] = t[3] ^ ROTL64(t[0], 1); + + /* theta: a[0,i], a[1,i], .. a[4,i] ^= d[i] */ + s[0] ^= u[0]; s[5] ^= u[0]; s[10] ^= u[0]; s[15] ^= u[0]; s[20] ^= u[0]; + s[1] ^= u[1]; s[6] ^= u[1]; s[11] ^= u[1]; s[16] ^= u[1]; s[21] ^= u[1]; + s[2] ^= u[2]; s[7] ^= u[2]; s[12] ^= u[2]; s[17] ^= u[2]; s[22] ^= u[2]; + s[3] ^= u[3]; s[8] ^= u[3]; s[13] ^= u[3]; s[18] ^= u[3]; s[23] ^= u[3]; + s[4] ^= u[4]; s[9] ^= u[4]; s[14] ^= u[4]; s[19] ^= u[4]; s[24] ^= u[4]; + + /* rho pi: b[..] = rotl(a[..], ..) */ + v = s[ 1]; + s[ 1] = ROTL64(s[ 6], 44); + s[ 6] = ROTL64(s[ 9], 20); + s[ 9] = ROTL64(s[22], 61); + s[22] = ROTL64(s[14], 39); + s[14] = ROTL64(s[20], 18); + s[20] = ROTL64(s[ 2], 62); + s[ 2] = ROTL64(s[12], 43); + s[12] = ROTL64(s[13], 25); + s[13] = ROTL64(s[19], 8); + s[19] = ROTL64(s[23], 56); + s[23] = ROTL64(s[15], 41); + s[15] = ROTL64(s[ 4], 27); + s[ 4] = ROTL64(s[24], 14); + s[24] = ROTL64(s[21], 2); + s[21] = ROTL64(s[ 8], 55); + s[ 8] = ROTL64(s[16], 45); + s[16] = ROTL64(s[ 5], 36); + s[ 5] = ROTL64(s[ 3], 28); + s[ 3] = ROTL64(s[18], 21); + s[18] = ROTL64(s[17], 15); + s[17] = ROTL64(s[11], 10); + s[11] = ROTL64(s[ 7], 6); + s[ 7] = ROTL64(s[10], 3); + s[10] = ROTL64( v, 1); + + /* chi: a[i,j] ^= ~b[i,j+1] & b[i,j+2] */ + v = s[ 0]; w = s[ 1]; s[ 0] ^= (~w) & s[ 2]; s[ 1] ^= (~s[ 2]) & s[ 3]; s[ 2] ^= (~s[ 3]) & s[ 4]; s[ 3] ^= (~s[ 4]) & v; s[ 4] ^= (~v) & w; + v = s[ 5]; w = s[ 6]; s[ 5] ^= (~w) & s[ 7]; s[ 6] ^= (~s[ 7]) & s[ 8]; s[ 7] ^= (~s[ 8]) & s[ 9]; s[ 8] ^= (~s[ 9]) & v; s[ 9] ^= (~v) & w; + v = s[10]; w = s[11]; s[10] ^= (~w) & s[12]; s[11] ^= (~s[12]) & s[13]; s[12] ^= (~s[13]) & s[14]; s[13] ^= (~s[14]) & v; s[14] ^= (~v) & w; + v = s[15]; w = s[16]; s[15] ^= (~w) & s[17]; s[16] ^= (~s[17]) & s[18]; s[17] ^= (~s[18]) & s[19]; s[18] ^= (~s[19]) & v; s[19] ^= (~v) & w; + v = s[20]; w = s[21]; s[20] ^= (~w) & s[22]; s[21] ^= (~s[22]) & s[23]; s[22] ^= (~s[23]) & s[24]; s[23] ^= (~s[24]) & v; s[24] ^= (~v) & w; + + /* iota: a[0,0] ^= round constant */ + s[0] ^= keccak_round_constants[i]; + } +} + + + + __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) + + + +__global__ void m7_keccak512_gpu_hash_120(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread; + + uint64_t state[25]; + + #pragma unroll 16 + for (int i=9;i<25;i++) {state[i]=stateo[i];} + + state[0] = xor1(stateo[0],c_PaddedMessage80[9]); + state[1] = xor1(stateo[1],c_PaddedMessage80[10]); + state[2] = xor1(stateo[2],c_PaddedMessage80[11]); + state[3] = xor1(stateo[3],c_PaddedMessage80[12]); + state[4] = xor1(stateo[4],c_PaddedMessage80[13]); + state[5] = xor1(stateo[5],REPLACE_HIWORD(c_PaddedMessage80[14],nounce)); + state[6] = xor1(stateo[6],c_PaddedMessage80[15]); + state[7] = stateo[7]; + state[8] = xor1(stateo[8],0x8000000000000000); + + keccak_block(state,RC); + +#pragma unroll 8 +for (int i=0;i<8;i++) {outputHash[i*threads+thread]=state[i];} + + + } //thread +} + +__global__ void __launch_bounds__(256, 3) m7_keccak512_gpu_hash_120_v35(int threads, uint32_t startNounce, uint64_t *outputHash) +{ + + int thread = (blockDim.x * blockIdx.x + threadIdx.x); + if (thread < threads) + { + + uint32_t nounce = startNounce + thread; + + uint2 state[25]; + +#pragma unroll 25 + for (int i = 0; i<25; i++) { state[i] = vectorize(stateo[i]); } + + state[0] ^= vectorize(c_PaddedMessage80[9]); + state[1] ^= vectorize(c_PaddedMessage80[10]); + state[2] ^= vectorize(c_PaddedMessage80[11]); + state[3] ^= vectorize(c_PaddedMessage80[12]); + state[4] ^= vectorize(c_PaddedMessage80[13]); + state[5] ^= make_uint2(((uint32_t*)c_PaddedMessage80)[28],nounce); + state[6] ^= vectorize(c_PaddedMessage80[15]); + + state[8] ^= make_uint2(0,0x80000000); + + keccak_blockv35(state, RC); + +#pragma unroll 8 + for (int i = 0; i<8; i++) { outputHash[i*threads + thread] = devectorize(state[i]); } + + + } //thread +} + + +void m7_keccak512_cpu_init(int thr_id, int threads) +{ + + cudaMemcpyToSymbol( RC,cpu_RC,sizeof(cpu_RC),0,cudaMemcpyHostToDevice); +} + +__host__ void m7_keccak512_setBlock_120(void *pdata) +{ + + unsigned char PaddedMessage[128]; + uint8_t ending =0x01; + memcpy(PaddedMessage, pdata, 122); + memset(PaddedMessage+122,ending,1); + memset(PaddedMessage+123, 0, 5); + cudaMemcpyToSymbol( c_PaddedMessage80, PaddedMessage, 16*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + uint64_t* alt_data = (uint64_t*) pdata; + uint64_t state[25]; + for(int i=0;i<25;i++) {state[i]=0;} + + + for (int i=0;i<9;i++) {state[i] ^= alt_data[i];} + keccak_block_host(state,cpu_RC); + + cudaMemcpyToSymbol(stateo, state, 25*sizeof(uint64_t), 0, cudaMemcpyHostToDevice); + +} + + +__host__ void m7_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint64_t *d_hash, int order) +{ + const int threadsperblock = 256; + + dim3 grid(threads/threadsperblock); + dim3 block(threadsperblock); + + size_t shared_size = 0; + if (compute_version[thr_id]<35) { + m7_keccak512_gpu_hash_120<<>>(threads, startNounce, d_hash); + } + else { + m7_keccak512_gpu_hash_120_v35 << > >(threads, startNounce, d_hash); + } + + MyStreamSynchronize(NULL, order, thr_id); +} + diff --git a/x13/whirlpool.cu b/x13/whirlpool.cu new file mode 100644 index 0000000000..4a9a6521ff --- /dev/null +++ b/x13/whirlpool.cu @@ -0,0 +1,128 @@ +/* + * whirlpool routine for new algorithm + * + */ + +extern "C" +{ +#include "sph/sph_whirlpool.h" + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + +extern void whirlpool512_cpu_init(int thr_id, int threads, int flag); +extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget); +extern void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); + +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + +// fresh Hashfunktion +inline void wh_hash(void *state, const void *input) +{ + // shavite-simd-shavite-simd-echo + + + sph_whirlpool_context ctx_whirlpool; + + + uint32_t hash[16]; + + // shavite 1 + sph_whirlpool1_init(&ctx_whirlpool); + sph_whirlpool1 (&ctx_whirlpool, input, 80); + sph_whirlpool1_close(&ctx_whirlpool, (void*) hash); + + + sph_whirlpool1_init(&ctx_whirlpool); + sph_whirlpool1 (&ctx_whirlpool, (const void*) hash, 64); + sph_whirlpool1_close(&ctx_whirlpool, (void*) hash); + + sph_whirlpool1_init(&ctx_whirlpool); + sph_whirlpool1 (&ctx_whirlpool, (const void*) hash, 64); + sph_whirlpool1_close(&ctx_whirlpool, (void*) hash); + + sph_whirlpool1_init(&ctx_whirlpool); + sph_whirlpool1 (&ctx_whirlpool, (const void*) hash, 64); + sph_whirlpool1_close(&ctx_whirlpool, (void*) hash); + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_wh(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8*4; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + whirlpool512_cpu_init(thr_id, throughput,1); + +// quark_check_cpu_init(thr_id, throughput); + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) { + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); } + whirlpool512_setBlock_80((void*)endiandata, ptarget); +// quark_check_cpu_setTarget(ptarget); + do { + int order = 0; + + whirlpool512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + whirlpool512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + whirlpool512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + uint32_t foundNonce = whirlpool512_cpu_finalhash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + + wh_hash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU! vhash64 %08x and htarg %08x", thr_id, foundNonce,vhash64[7],Htarg); + } + } + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/x13/x14.cu b/x13/x14.cu new file mode 100644 index 0000000000..4dd7cdba5c --- /dev/null +++ b/x13/x14.cu @@ -0,0 +1,311 @@ +/* + * X14 algorithm built on cbuchner1's original X11 + * + */ + +extern "C" +{ +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" + +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" + +#include "sph/sph_hamsi.h" +#include "sph/sph_fugue.h" + +#include "sph/sph_shabal.h" + + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + +extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_setBlock_80(void *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_bmw512_cpu_init(int thr_id, int threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_groestl512_cpu_init(int thr_id, int threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_skein512_cpu_init(int thr_id, int threads); +extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_keccak512_cpu_init(int thr_id, int threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_jh512_cpu_init(int thr_id, int threads); +extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_luffa512_cpu_init(int thr_id, int threads); +extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_cubehash512_cpu_init(int thr_id, int threads); +extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_shavite512_cpu_init(int thr_id, int threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_simd512_cpu_init(int thr_id, int threads); +extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_echo512_cpu_init(int thr_id, int threads); +extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_hamsi512_cpu_init(int thr_id, int threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_fugue512_cpu_init(int thr_id, int threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_shabal512_cpu_init(int thr_id, int threads); +extern void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +/* +extern void x13_whirlpool512_cpu_init(int thr_id, int threads); +extern void x13_whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +*/ +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + +extern void quark_compactTest_cpu_init(int thr_id, int threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, + uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, + int order); + +// X13 Hashfunktion +inline void x14hash(void *state, const void *input) +{ + // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13-shabal14 + + sph_blake512_context ctx_blake; + sph_bmw512_context ctx_bmw; + sph_groestl512_context ctx_groestl; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_skein512_context ctx_skein; + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + sph_hamsi512_context ctx_hamsi; + sph_fugue512_context ctx_fugue; + sph_shabal512_context ctx_shabal; + + + uint32_t hash[16]; + + sph_blake512_init(&ctx_blake); + // ZBLAKE; + sph_blake512 (&ctx_blake, input, 80); + sph_blake512_close(&ctx_blake, (void*) hash); + + sph_bmw512_init(&ctx_bmw); + // ZBMW; + sph_bmw512 (&ctx_bmw, (const void*) hash, 64); + sph_bmw512_close(&ctx_bmw, (void*) hash); + + sph_groestl512_init(&ctx_groestl); + // ZGROESTL; + sph_groestl512 (&ctx_groestl, (const void*) hash, 64); + sph_groestl512_close(&ctx_groestl, (void*) hash); + + sph_skein512_init(&ctx_skein); + // ZSKEIN; + sph_skein512 (&ctx_skein, (const void*) hash, 64); + sph_skein512_close(&ctx_skein, (void*) hash); + + sph_jh512_init(&ctx_jh); + // ZJH; + sph_jh512 (&ctx_jh, (const void*) hash, 64); + sph_jh512_close(&ctx_jh, (void*) hash); + + sph_keccak512_init(&ctx_keccak); + // ZKECCAK; + sph_keccak512 (&ctx_keccak, (const void*) hash, 64); + sph_keccak512_close(&ctx_keccak, (void*) hash); + + sph_luffa512_init(&ctx_luffa); + // ZLUFFA; + sph_luffa512 (&ctx_luffa, (const void*) hash, 64); + sph_luffa512_close (&ctx_luffa, (void*) hash); + + sph_cubehash512_init(&ctx_cubehash); + // ZCUBEHASH; + sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*) hash); + + sph_shavite512_init(&ctx_shavite); + // ZSHAVITE; + sph_shavite512 (&ctx_shavite, (const void*) hash, 64); + sph_shavite512_close(&ctx_shavite, (void*) hash); + + sph_simd512_init(&ctx_simd); + // ZSIMD + sph_simd512 (&ctx_simd, (const void*) hash, 64); + sph_simd512_close(&ctx_simd, (void*) hash); + + sph_echo512_init(&ctx_echo); + // ZECHO + sph_echo512 (&ctx_echo, (const void*) hash, 64); + sph_echo512_close(&ctx_echo, (void*) hash); + + sph_hamsi512_init(&ctx_hamsi); + sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*) hash); + + sph_fugue512_init(&ctx_fugue); + sph_fugue512 (&ctx_fugue, (const void*) hash, 64); + sph_fugue512_close(&ctx_fugue, (void*) hash); + + sph_shabal512_init(&ctx_shabal); + sph_shabal512 (&ctx_shabal, (const void*) hash, 64); + sph_shabal512_close(&ctx_shabal, (void*) hash); + + + + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + quark_blake512_cpu_init(thr_id, throughput); + quark_groestl512_cpu_init(thr_id, throughput); + quark_skein512_cpu_init(thr_id, throughput); + quark_bmw512_cpu_init(thr_id, throughput); + quark_keccak512_cpu_init(thr_id, throughput); + quark_jh512_cpu_init(thr_id, throughput); + x11_luffa512_cpu_init(thr_id, throughput); + x11_cubehash512_cpu_init(thr_id, throughput); + x11_shavite512_cpu_init(thr_id, throughput); + x11_simd512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + x13_hamsi512_cpu_init(thr_id, throughput); + x13_fugue512_cpu_init(thr_id, throughput); + x13_shabal512_cpu_init(thr_id, throughput); + + + + quark_check_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_check_cpu_setTarget(ptarget); + + do { + int order = 0; + + // erstes Blake512 Hash mit CUDA + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + // das ist der unbedingte Branch für BMW512 + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Groestl512 + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Skein512 + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für JH512 + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Keccak512 + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Luffa512 + x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Cubehash512 + x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Shavite512 + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für SIMD512 + x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für ECHO512 + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + + + // Scan nach Gewinner Hashes auf der GPU + uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + x14hash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/x13/x15.cu b/x13/x15.cu new file mode 100644 index 0000000000..9e51ebcc83 --- /dev/null +++ b/x13/x15.cu @@ -0,0 +1,313 @@ +/* + * X15 algorithm built on cbuchner1's original X11 + * + */ + +extern "C" +{ +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" + +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" + +#include "sph/sph_hamsi.h" +#include "sph/sph_fugue.h" + +#include "sph/sph_shabal.h" +#include "sph/sph_whirlpool.h" + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + +extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_setBlock_80(void *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_bmw512_cpu_init(int thr_id, int threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_groestl512_cpu_init(int thr_id, int threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_skein512_cpu_init(int thr_id, int threads); +extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_keccak512_cpu_init(int thr_id, int threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_jh512_cpu_init(int thr_id, int threads); +extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_luffa512_cpu_init(int thr_id, int threads); +extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_cubehash512_cpu_init(int thr_id, int threads); +extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_shavite512_cpu_init(int thr_id, int threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_simd512_cpu_init(int thr_id, int threads); +extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_echo512_cpu_init(int thr_id, int threads); +extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_hamsi512_cpu_init(int thr_id, int threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_fugue512_cpu_init(int thr_id, int threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_shabal512_cpu_init(int thr_id, int threads); +extern void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void whirlpool512_cpu_init(int thr_id, int threads,int flag); +extern void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + +extern void quark_compactTest_cpu_init(int thr_id, int threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, + uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, + int order); + +// X13 Hashfunktion +inline void x15hash(void *state, const void *input) +{ + // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13-shabal14-whirlpool15 + + sph_blake512_context ctx_blake; + sph_bmw512_context ctx_bmw; + sph_groestl512_context ctx_groestl; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_skein512_context ctx_skein; + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + sph_hamsi512_context ctx_hamsi; + sph_fugue512_context ctx_fugue; + sph_shabal512_context ctx_shabal; + sph_whirlpool_context ctx_whirlpool; + + uint32_t hash[16]; + + sph_blake512_init(&ctx_blake); + // ZBLAKE; + sph_blake512 (&ctx_blake, input, 80); + sph_blake512_close(&ctx_blake, (void*) hash); + + sph_bmw512_init(&ctx_bmw); + // ZBMW; + sph_bmw512 (&ctx_bmw, (const void*) hash, 64); + sph_bmw512_close(&ctx_bmw, (void*) hash); + + sph_groestl512_init(&ctx_groestl); + // ZGROESTL; + sph_groestl512 (&ctx_groestl, (const void*) hash, 64); + sph_groestl512_close(&ctx_groestl, (void*) hash); + + sph_skein512_init(&ctx_skein); + // ZSKEIN; + sph_skein512 (&ctx_skein, (const void*) hash, 64); + sph_skein512_close(&ctx_skein, (void*) hash); + + sph_jh512_init(&ctx_jh); + // ZJH; + sph_jh512 (&ctx_jh, (const void*) hash, 64); + sph_jh512_close(&ctx_jh, (void*) hash); + + sph_keccak512_init(&ctx_keccak); + // ZKECCAK; + sph_keccak512 (&ctx_keccak, (const void*) hash, 64); + sph_keccak512_close(&ctx_keccak, (void*) hash); + + sph_luffa512_init(&ctx_luffa); + // ZLUFFA; + sph_luffa512 (&ctx_luffa, (const void*) hash, 64); + sph_luffa512_close (&ctx_luffa, (void*) hash); + + sph_cubehash512_init(&ctx_cubehash); + // ZCUBEHASH; + sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*) hash); + + sph_shavite512_init(&ctx_shavite); + // ZSHAVITE; + sph_shavite512 (&ctx_shavite, (const void*) hash, 64); + sph_shavite512_close(&ctx_shavite, (void*) hash); + + sph_simd512_init(&ctx_simd); + // ZSIMD + sph_simd512 (&ctx_simd, (const void*) hash, 64); + sph_simd512_close(&ctx_simd, (void*) hash); + + sph_echo512_init(&ctx_echo); + // ZECHO + sph_echo512 (&ctx_echo, (const void*) hash, 64); + sph_echo512_close(&ctx_echo, (void*) hash); + + sph_hamsi512_init(&ctx_hamsi); + sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*) hash); + + sph_fugue512_init(&ctx_fugue); + sph_fugue512 (&ctx_fugue, (const void*) hash, 64); + sph_fugue512_close(&ctx_fugue, (void*) hash); + + sph_shabal512_init(&ctx_shabal); + sph_shabal512 (&ctx_shabal, (const void*) hash, 64); + sph_shabal512_close(&ctx_shabal, (void*) hash); + + sph_whirlpool_init(&ctx_whirlpool); + sph_whirlpool (&ctx_whirlpool, (const void*) hash, 64); + sph_whirlpool_close(&ctx_whirlpool, (void*) hash); + + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + quark_blake512_cpu_init(thr_id, throughput); + quark_groestl512_cpu_init(thr_id, throughput); + quark_skein512_cpu_init(thr_id, throughput); + quark_bmw512_cpu_init(thr_id, throughput); + quark_keccak512_cpu_init(thr_id, throughput); + quark_jh512_cpu_init(thr_id, throughput); + x11_luffa512_cpu_init(thr_id, throughput); + x11_cubehash512_cpu_init(thr_id, throughput); + x11_shavite512_cpu_init(thr_id, throughput); + x11_simd512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + x13_hamsi512_cpu_init(thr_id, throughput); + x13_fugue512_cpu_init(thr_id, throughput); + x13_shabal512_cpu_init(thr_id, throughput); + whirlpool512_cpu_init(thr_id, throughput,0); + + + quark_check_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_check_cpu_setTarget(ptarget); + + do { + int order = 0; + + // erstes Blake512 Hash mit CUDA + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + // das ist der unbedingte Branch für BMW512 + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Groestl512 + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Skein512 + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für JH512 + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Keccak512 + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Luffa512 + x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Cubehash512 + x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Shavite512 + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für SIMD512 + x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für ECHO512 + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + whirlpool512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // Scan nach Gewinner Hashes auf der GPU + uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + x15hash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/x13/x17.cu b/x13/x17.cu new file mode 100644 index 0000000000..aeb2a41bfb --- /dev/null +++ b/x13/x17.cu @@ -0,0 +1,343 @@ +/* + * X17 algorithm built on cbuchner1's original X11 + * + */ + +extern "C" +{ +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" + +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" + +#include "sph/sph_hamsi.h" +#include "sph/sph_fugue.h" + +#include "sph/sph_shabal.h" +#include "sph/sph_whirlpool.h" +#include "sph/sph_sha2.h" +#include "sph/sph_haval.h" + + +#include "miner.h" +} + +// aus cpu-miner.c +extern int device_map[8]; + +// Speicher für Input/Output der verketteten Hashfunktionen +static uint32_t *d_hash[8]; + +extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_setBlock_80(void *pdata); +extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_bmw512_cpu_init(int thr_id, int threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_groestl512_cpu_init(int thr_id, int threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +//extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_skein512_cpu_init(int thr_id, int threads); +extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_keccak512_cpu_init(int thr_id, int threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void quark_jh512_cpu_init(int thr_id, int threads); +extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_luffa512_cpu_init(int thr_id, int threads); +extern void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_cubehash512_cpu_init(int thr_id, int threads); +extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_shavite512_cpu_init(int thr_id, int threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_simd512_cpu_init(int thr_id, int threads); +extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x11_echo512_cpu_init(int thr_id, int threads); +extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_hamsi512_cpu_init(int thr_id, int threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_fugue512_cpu_init(int thr_id, int threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void x13_shabal512_cpu_init(int thr_id, int threads); +extern void x13_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void whirlpool512_cpu_init(int thr_id, int threads, int flag); +extern void whirlpool512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void sha512_cpu_init(int thr_id, int threads); +extern void sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + +extern void haval256_cpu_init(int thr_id, int threads); +extern void haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); + + + +extern void quark_check_cpu_init(int thr_id, int threads); +extern void quark_check_cpu_setTarget(const void *ptarget); +extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); + +extern void quark_compactTest_cpu_init(int thr_id, int threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, + uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, + int order); + +// X13 Hashfunktion +inline void x17hash(void *state, const void *input) +{ + // blake1-bmw2-grs3-skein4-jh5-keccak6-luffa7-cubehash8-shavite9-simd10-echo11-hamsi12-fugue13-shabal14-whirlpool15 + + sph_blake512_context ctx_blake; + sph_bmw512_context ctx_bmw; + sph_groestl512_context ctx_groestl; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_skein512_context ctx_skein; + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + sph_hamsi512_context ctx_hamsi; + sph_fugue512_context ctx_fugue; + sph_shabal512_context ctx_shabal; + sph_whirlpool_context ctx_whirlpool; + sph_sha512_context ctx_sha512; + sph_haval256_5_context ctx_haval; + + uint32_t hash[16]; + + sph_blake512_init(&ctx_blake); + // ZBLAKE; + sph_blake512 (&ctx_blake, input, 80); + sph_blake512_close(&ctx_blake, (void*) hash); + + sph_bmw512_init(&ctx_bmw); + // ZBMW; + sph_bmw512 (&ctx_bmw, (const void*) hash, 64); + sph_bmw512_close(&ctx_bmw, (void*) hash); + + sph_groestl512_init(&ctx_groestl); + // ZGROESTL; + sph_groestl512 (&ctx_groestl, (const void*) hash, 64); + sph_groestl512_close(&ctx_groestl, (void*) hash); + + sph_skein512_init(&ctx_skein); + // ZSKEIN; + sph_skein512 (&ctx_skein, (const void*) hash, 64); + sph_skein512_close(&ctx_skein, (void*) hash); + + sph_jh512_init(&ctx_jh); + // ZJH; + sph_jh512 (&ctx_jh, (const void*) hash, 64); + sph_jh512_close(&ctx_jh, (void*) hash); + + sph_keccak512_init(&ctx_keccak); + // ZKECCAK; + sph_keccak512 (&ctx_keccak, (const void*) hash, 64); + sph_keccak512_close(&ctx_keccak, (void*) hash); + + sph_luffa512_init(&ctx_luffa); + // ZLUFFA; + sph_luffa512 (&ctx_luffa, (const void*) hash, 64); + sph_luffa512_close (&ctx_luffa, (void*) hash); + + sph_cubehash512_init(&ctx_cubehash); + // ZCUBEHASH; + sph_cubehash512 (&ctx_cubehash, (const void*) hash, 64); + sph_cubehash512_close(&ctx_cubehash, (void*) hash); + + sph_shavite512_init(&ctx_shavite); + // ZSHAVITE; + sph_shavite512 (&ctx_shavite, (const void*) hash, 64); + sph_shavite512_close(&ctx_shavite, (void*) hash); + + sph_simd512_init(&ctx_simd); + // ZSIMD + sph_simd512 (&ctx_simd, (const void*) hash, 64); + sph_simd512_close(&ctx_simd, (void*) hash); + + sph_echo512_init(&ctx_echo); + // ZECHO + sph_echo512 (&ctx_echo, (const void*) hash, 64); + sph_echo512_close(&ctx_echo, (void*) hash); + + sph_hamsi512_init(&ctx_hamsi); + sph_hamsi512 (&ctx_hamsi, (const void*) hash, 64); + sph_hamsi512_close(&ctx_hamsi, (void*) hash); + + sph_fugue512_init(&ctx_fugue); + sph_fugue512 (&ctx_fugue, (const void*) hash, 64); + sph_fugue512_close(&ctx_fugue, (void*) hash); + + sph_shabal512_init(&ctx_shabal); + sph_shabal512 (&ctx_shabal, (const void*) hash, 64); + sph_shabal512_close(&ctx_shabal, (void*) hash); + + sph_whirlpool_init(&ctx_whirlpool); + sph_whirlpool (&ctx_whirlpool, (const void*) hash, 64); + sph_whirlpool_close(&ctx_whirlpool, (void*) hash); + + + sph_sha512_init(&ctx_sha512); + sph_sha512(&ctx_sha512,(const void*) hash, 64); + sph_sha512_close(&ctx_sha512,(void*) hash); + + + sph_haval256_5_init(&ctx_haval); + sph_haval256_5(&ctx_haval,(const void*) hash, 64); + sph_haval256_5_close(&ctx_haval,(void*) hash); + + memcpy(state, hash, 32); +} + + +extern bool opt_benchmark; + +extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, + const uint32_t *ptarget, uint32_t max_nonce, + unsigned long *hashes_done) +{ + const uint32_t first_nonce = pdata[19]; + + if (opt_benchmark) + ((uint32_t*)ptarget)[7] = 0x0000ff; + + const uint32_t Htarg = ptarget[7]; + + const int throughput = 256*256*8; + + static bool init[8] = {0,0,0,0,0,0,0,0}; + if (!init[thr_id]) + { + cudaSetDevice(device_map[thr_id]); + + // Konstanten kopieren, Speicher belegen + cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + quark_blake512_cpu_init(thr_id, throughput); + + quark_groestl512_cpu_init(thr_id, throughput); + quark_skein512_cpu_init(thr_id, throughput); + quark_bmw512_cpu_init(thr_id, throughput); + quark_keccak512_cpu_init(thr_id, throughput); + quark_jh512_cpu_init(thr_id, throughput); + x11_luffa512_cpu_init(thr_id, throughput); + x11_cubehash512_cpu_init(thr_id, throughput); + x11_shavite512_cpu_init(thr_id, throughput); + x11_simd512_cpu_init(thr_id, throughput); + x11_echo512_cpu_init(thr_id, throughput); + x13_hamsi512_cpu_init(thr_id, throughput); + x13_fugue512_cpu_init(thr_id, throughput); + x13_shabal512_cpu_init(thr_id, throughput); + whirlpool512_cpu_init(thr_id, throughput,0); + + sha512_cpu_init(thr_id, throughput); + + haval256_cpu_init(thr_id, throughput); + + quark_check_cpu_init(thr_id, throughput); + + init[thr_id] = true; + } + + //unsigned char echobefore[64], echoafter[64]; + + uint32_t endiandata[20]; + for (int k=0; k < 20; k++) + be32enc(&endiandata[k], ((uint32_t*)pdata)[k]); + + quark_blake512_cpu_setBlock_80((void*)endiandata); + quark_check_cpu_setTarget(ptarget); + + do { + int order = 0; + + // erstes Blake512 Hash mit CUDA + quark_blake512_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); + + // das ist der unbedingte Branch für BMW512 + quark_bmw512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Groestl512 + quark_groestl512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Skein512 + quark_skein512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für JH512 + quark_jh512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Keccak512 + quark_keccak512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Luffa512 + x11_luffa512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Cubehash512 + x11_cubehash512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für Shavite512 + x11_shavite512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für SIMD512 + x11_simd512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // das ist der unbedingte Branch für ECHO512 + x11_echo512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_hamsi512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + x13_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + whirlpool512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + sha512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + haval256_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + + // Scan nach Gewinner Hashes auf der GPU + uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++); + if (foundNonce != 0xffffffff) + { + uint32_t vhash64[8]; + be32enc(&endiandata[19], foundNonce); + x17hash(vhash64, endiandata); + + if( (vhash64[7]<=Htarg) && fulltest(vhash64, ptarget) ) { + + pdata[19] = foundNonce; + *hashes_done = foundNonce - first_nonce + 1; + return 1; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNonce); + } + } + + pdata[19] += throughput; + + } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +}