Skip to content

Commit

Permalink
upgrade cryptolight algo for AEON support
Browse files Browse the repository at this point in the history
they also forked to variant 1 system...
  • Loading branch information
tpruvot committed Jun 23, 2018
1 parent 71fc784 commit 1f8c05e
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 22 deletions.
2 changes: 1 addition & 1 deletion ccminer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2380,7 +2380,7 @@ static void *miner_thread(void *userdata)
rc = scanhash_c11(thr_id, &work, max_nonce, &hashes_done);
break;
case ALGO_CRYPTOLIGHT:
rc = scanhash_cryptolight(thr_id, &work, max_nonce, &hashes_done);
rc = scanhash_cryptolight(thr_id, &work, max_nonce, &hashes_done, 1);
break;
case ALGO_CRYPTONIGHT:
{
Expand Down
76 changes: 73 additions & 3 deletions crypto/cryptolight-core.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,10 @@ void cryptolight_core_gpu_phase1(int threads, uint32_t * long_state, uint32_t *
}
}

// --------------------------------------------------------------------------------------------------------------

__global__
void cryptolight_core_gpu_phase2(const int threads, const int bfactor, const int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b)
void cryptolight_old_gpu_phase2(const int threads, const int bfactor, const int partidx, uint32_t * d_long_state, uint32_t * d_ctx_a, uint32_t * d_ctx_b)
{
__shared__ uint32_t __align__(16) sharedMemory[1024];

Expand Down Expand Up @@ -209,6 +211,70 @@ void cryptolight_core_gpu_phase2(const int threads, const int bfactor, const int
#endif // __CUDA_ARCH__ >= 300
}

__device__ __forceinline__ void store_variant1(uint32_t* long_state)
{
uint4* Z = (uint4*) long_state;
const uint32_t tmp = (Z->z >> 24); // __byte_perm(src, 0, 0x7773);
const uint32_t index = (((tmp >> 3) & 6u) | (tmp & 1u)) << 1;
Z->z = (Z->z & 0x00ffffffu) | ((tmp ^ ((0x75310u >> index) & 0x30u)) << 24);
}

#define MUL_SUM_XOR_DST_1(a,c,dst,tweak) { \
uint64_t hi, lo = cuda_mul128(((uint64_t *)a)[0], ((uint64_t *)dst)[0], &hi) + ((uint64_t *)c)[1]; \
hi += ((uint64_t *)c)[0]; \
((uint64_t *)c)[0] = ((uint64_t *)dst)[0] ^ hi; \
((uint64_t *)c)[1] = ((uint64_t *)dst)[1] ^ lo; \
((uint64_t *)dst)[0] = hi; \
((uint64_t *)dst)[1] = lo ^ tweak; }

__global__
void cryptolight_gpu_phase2(const uint32_t threads, const uint16_t bfactor, const uint32_t partidx,
uint32_t * __restrict__ d_long_state, uint32_t * __restrict__ d_ctx_a, uint32_t * __restrict__ d_ctx_b,
uint64_t * __restrict__ d_tweak)
{
__shared__ __align__(16) uint32_t sharedMemory[1024];
cn_aes_gpu_init(sharedMemory);
__syncthreads();

const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x;
if (thread < threads)
{
const uint32_t batchsize = ITER >> (2 + bfactor);
const uint32_t start = partidx * batchsize;
const uint32_t end = start + batchsize;
const uint32_t longptr = thread << LONG_SHL_IDX;
uint32_t * long_state = &d_long_state[longptr];
uint64_t tweak = d_tweak[thread];

void * ctx_a = (void*)(&d_ctx_a[thread << 2]);
void * ctx_b = (void*)(&d_ctx_b[thread << 2]);
uint4 A = AS_UINT4(ctx_a); // ld.global.u32.v4
uint4 B = AS_UINT4(ctx_b);
uint32_t* a = (uint32_t*)&A;
uint32_t* b = (uint32_t*)&B;

for (int i = start; i < end; i++)
{
uint32_t c[4];
uint32_t j = (A.x >> 2) & E2I_MASK2;
cn_aes_single_round(sharedMemory, &long_state[j], c, a);
XOR_BLOCKS_DST(c, b, &long_state[j]);
store_variant1(&long_state[j]);
MUL_SUM_XOR_DST_1(c, a, &long_state[(c[0] >> 2) & E2I_MASK2], tweak);

j = (A.x >> 2) & E2I_MASK2;
cn_aes_single_round(sharedMemory, &long_state[j], b, a);
XOR_BLOCKS_DST(b, c, &long_state[j]);
store_variant1(&long_state[j]);
MUL_SUM_XOR_DST_1(b, a, &long_state[(b[0] >> 2) & E2I_MASK2], tweak);
}
if (bfactor) {
AS_UINT4(ctx_a) = A;
AS_UINT4(ctx_b) = B;
}
}
}

__global__
void cryptolight_core_gpu_phase3(int threads, const uint32_t * long_state, uint32_t * ctx_state, uint32_t * ctx_key2)
{
Expand Down Expand Up @@ -252,7 +318,7 @@ extern int device_bfactor[MAX_GPUS];

__host__
void cryptolight_core_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state,
uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2)
uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint64_t *d_ctx_tweak)
{
dim3 grid(blocks);
dim3 block(threads);
Expand All @@ -271,7 +337,11 @@ void cryptolight_core_hash(int thr_id, int blocks, int threads, uint32_t *d_long

for(i = 0; i < partcount; i++)
{
cryptolight_core_gpu_phase2 <<<grid, (device_sm[dev_id] >= 300 ? block4 : block)>>>(blocks*threads, bfactor, i, d_long_state, d_ctx_a, d_ctx_b);
dim3 b = device_sm[dev_id] >= 300 ? block4 : block;
if (variant == 0)
cryptolight_old_gpu_phase2 <<<grid, b>>> (blocks*threads, bfactor, i, d_long_state, d_ctx_a, d_ctx_b);
else
cryptolight_gpu_phase2 <<<grid, b>>> (blocks*threads, bfactor, i, d_long_state, d_ctx_a, d_ctx_b, d_ctx_tweak);
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
if(partcount > 1) usleep(bsleep);
}
Expand Down
39 changes: 32 additions & 7 deletions crypto/cryptolight-cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ struct cryptonight_ctx {
oaes_ctx* aes_ctx;
};


static void cryptolight_store_variant(void* state, int variant) {
if (variant == 1) {
// use variant 1 like monero since june 2018
const uint8_t tmp = ((const uint8_t*)(state))[11];
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1;
((uint8_t*)(state))[11] = tmp ^ ((0x75310 >> index) & 0x30);
}
}

static void do_blake_hash(const void* input, int len, void* output)
{
uchar hash[32];
Expand Down Expand Up @@ -132,14 +142,14 @@ static void mul_sum_dst(const uint8_t* a, const uint8_t* b, const uint8_t* c, ui
((uint64_t*) dst)[0] += ((uint64_t*) c)[0];
}

static void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst) {
static void mul_sum_xor_dst(const uint8_t* a, uint8_t* c, uint8_t* dst, const int variant, const uint64_t tweak) {
uint64_t hi, lo = mul128(((uint64_t*) a)[0], ((uint64_t*) dst)[0], &hi) + ((uint64_t*) c)[1];
hi += ((uint64_t*) c)[0];

((uint64_t*) c)[0] = ((uint64_t*) dst)[0] ^ hi;
((uint64_t*) c)[1] = ((uint64_t*) dst)[1] ^ lo;
((uint64_t*) dst)[0] = hi;
((uint64_t*) dst)[1] = lo;
((uint64_t*) dst)[1] = variant ? lo ^ tweak : lo;
}

static void copy_block(uint8_t* dst, const uint8_t* src) {
Expand All @@ -157,13 +167,18 @@ static void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) {
((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1];
}

static void cryptolight_hash_ctx(void* output, const void* input, const int len, struct cryptonight_ctx* ctx)
static int cryptolight_hash_ctx(void* output, const void* input, const int len, struct cryptonight_ctx* ctx, const int variant)
{
size_t i, j;
if (variant && len < 43)
return 0;

keccak_hash_process(&ctx->state.hs, (const uint8_t*) input, len);
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);

const uint64_t tweak = variant ? *((uint64_t*) (((uint8_t*)input) + 35)) ^ ctx->state.hs.w[24] : 0;

oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
#undef RND
Expand All @@ -186,14 +201,16 @@ static void cryptolight_hash_ctx(void* output, const void* input, const int len,
j = e2i(ctx->a);
aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
cryptolight_store_variant(&ctx->long_state[j], variant);

mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)]);
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c)], variant, tweak);

j = e2i(ctx->a);
aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
cryptolight_store_variant(&ctx->long_state[j], variant);

mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)]);
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b)], variant, tweak);
}

memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
Expand All @@ -219,11 +236,19 @@ static void cryptolight_hash_ctx(void* output, const void* input, const int len,
if (opt_debug) applog(LOG_DEBUG, "extra algo=%d", extra_algo);

oaes_free((OAES_CTX **) &ctx->aes_ctx);
return 1;
}

void cryptolight_hash(void* output, const void* input, int len)
int cryptolight_hash_variant(void* output, const void* input, int len, int variant)
{
struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx));
cryptolight_hash_ctx(output, input, len, ctx);
int rc = cryptolight_hash_ctx(output, input, len, ctx, variant);
free(ctx);
return rc;
}

void cryptolight_hash(void* output, const void* input)
{
cryptolight_hash_variant(output, input, 76, 1);
}

18 changes: 13 additions & 5 deletions crypto/cryptolight.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ static uint32_t *d_ctx_state[MAX_GPUS];
static uint32_t *d_ctx_key1[MAX_GPUS];
static uint32_t *d_ctx_key2[MAX_GPUS];
static uint32_t *d_ctx_text[MAX_GPUS];
static uint64_t *d_ctx_tweak[MAX_GPUS];
static uint32_t *d_ctx_a[MAX_GPUS];
static uint32_t *d_ctx_b[MAX_GPUS];

static bool init[MAX_GPUS] = { 0 };

extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int variant)
{
int res = 0;
uint32_t throughput = 0;
Expand All @@ -26,13 +27,18 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
uint32_t *nonceptr = (uint32_t*) (&pdata[39]);
const uint32_t first_nonce = *nonceptr;
uint32_t nonce = first_nonce;
int dev_id = device_map[thr_id];

if(opt_benchmark) {
ptarget[7] = 0x00ff;
}

if(!init[thr_id])
{
if (!device_config[thr_id] && strcmp(device_name[dev_id], "TITAN V") == 0) {
device_config[thr_id] = strdup("80x32");
}

if (device_config[thr_id]) {
sscanf(device_config[thr_id], "%ux%u", &cn_blocks, &cn_threads);
throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
Expand Down Expand Up @@ -79,6 +85,7 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
cudaMalloc(&d_ctx_b[thr_id], 4 * sizeof(uint32_t) * throughput);
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
cudaMalloc(&d_ctx_tweak[thr_id], sizeof(uint64_t) * throughput);

init[thr_id] = true;
}
Expand All @@ -91,8 +98,8 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };

cryptonight_extra_setData(thr_id, pdata, ptarget);
cryptonight_extra_prepare(thr_id, throughput, nonce, d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id], 0, NULL);
cryptolight_core_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]);
cryptonight_extra_prepare(thr_id, throughput, nonce, d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id], variant, d_ctx_tweak[thr_id]);
cryptolight_core_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id], variant, d_ctx_tweak[thr_id]);
cryptonight_extra_final(thr_id, throughput, nonce, resNonces, d_ctx_state[thr_id]);

*hashes_done = nonce - first_nonce + throughput;
Expand All @@ -104,7 +111,7 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
uint32_t *tempnonceptr = (uint32_t*)(((char*)tempdata) + 39);
memcpy(tempdata, pdata, 76);
*tempnonceptr = resNonces[0];
cryptolight_hash(vhash, tempdata, 76);
cryptolight_hash_variant(vhash, tempdata, 76, variant);
if(vhash[7] <= Htarg && fulltest(vhash, ptarget))
{
res = 1;
Expand All @@ -114,7 +121,7 @@ extern "C" int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_
if(resNonces[1] != UINT32_MAX)
{
*tempnonceptr = resNonces[1];
cryptolight_hash(vhash, tempdata, 76);
cryptolight_hash_variant(vhash, tempdata, 76, variant);
if(vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
res++;
work->nonces[1] = resNonces[1];
Expand Down Expand Up @@ -157,6 +164,7 @@ void free_cryptolight(int thr_id)
cudaFree(d_ctx_key1[thr_id]);
cudaFree(d_ctx_key2[thr_id]);
cudaFree(d_ctx_text[thr_id]);
cudaFree(d_ctx_tweak[thr_id]);
cudaFree(d_ctx_a[thr_id]);
cudaFree(d_ctx_b[thr_id]);

Expand Down
2 changes: 1 addition & 1 deletion crypto/cryptolight.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ static inline void exit_if_cudaerror(int thr_id, const char *src, int line)
}
}

void cryptolight_core_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2);
void cryptolight_core_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint64_t *d_ctx_tweak);

void cryptonight_extra_setData(int thr_id, const void *data, const void *ptarget);
void cryptonight_extra_init(int thr_id/*, uint32_t threads*/);
Expand Down
7 changes: 5 additions & 2 deletions crypto/xmr-rpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,16 +550,19 @@ bool rpc2_stratum_submit(struct pool_infos *pool, struct work *work)
}

else if (opt_algo == ALGO_CRYPTOLIGHT) {
int variant = 1;
uint32_t nonce = work->nonces[idnonce];
noncestr = bin2hex((unsigned char*) &nonce, 4);
last_found_nonce = nonce;
cryptolight_hash(hash, data, 76);
//if (cryptonight_fork > 1 && ((unsigned char*)work->data)[0] >= cryptonight_fork)
// variant = ((unsigned char*)work->data)[0] - cryptonight_fork + 1;
cryptolight_hash_variant(hash, data, 76, variant);
work_set_target_ratio(work, (uint32_t*) hash);
}

else if (opt_algo == ALGO_CRYPTONIGHT) {
uint32_t nonce = work->nonces[idnonce];
int variant = 0;
uint32_t nonce = work->nonces[idnonce];
noncestr = bin2hex((unsigned char*) &nonce, 4);
last_found_nonce = nonce;
if (cryptonight_fork > 1 && ((unsigned char*)work->data)[0] >= cryptonight_fork)
Expand Down
5 changes: 3 additions & 2 deletions miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ extern int scanhash_blake256(int thr_id, struct work* work, uint32_t max_nonce,
extern int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_bmw(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_cryptolight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int variant);
extern int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done, int variant);
extern int scanhash_decred(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
extern int scanhash_deep(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done);
Expand Down Expand Up @@ -900,7 +900,8 @@ void blake2b_hash(void *output, const void *input);
void blake2s_hash(void *output, const void *input);
void bmw_hash(void *state, const void *input);
void c11hash(void *output, const void *input);
void cryptolight_hash(void* output, const void* input, int len);
int cryptolight_hash_variant(void* output, const void* input, int len, int variant);
void cryptolight_hash(void* output, const void* input);
int cryptonight_hash_variant(void* output, const void* input, size_t len, int variant);
void cryptonight_hash(void* output, const void* input);
void monero_hash(void* output, const void* input);
Expand Down
2 changes: 1 addition & 1 deletion util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2193,7 +2193,7 @@ void print_hash_tests(void)
c11hash(&hash[0], &buf[0]);
printpfx("c11", hash);

cryptolight_hash(&hash[0], &buf[0], 76);
cryptolight_hash(&hash[0], &buf[0]);
printpfx("cryptolight", hash);

cryptonight_hash(&hash[0], &buf[0]);
Expand Down

0 comments on commit 1f8c05e

Please sign in to comment.