R"===( __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void cn1_cryptonight_r_N(__global uint4 *Scratchpad, __global ulong *states, uint variant, __global ulong *input, uint Threads) { ulong a[2], b[4]; __local uint AES0[256], AES1[256], AES2[256], AES3[256]; const ulong gIdx = get_global_id(0) - get_global_offset(0); for(int i = get_local_id(0); i < 256; i += WORKSIZE) { const uint tmp = AES0_C[i]; AES0[i] = tmp; AES1[i] = rotate(tmp, 8U); AES2[i] = rotate(tmp, 16U); AES3[i] = rotate(tmp, 24U); } barrier(CLK_LOCAL_MEM_FENCE); # if (COMP_MODE == 1) // do not use early return here if (gIdx < Threads) # endif { states += 25 * gIdx; # if defined(__NV_CL_C_VERSION) Scratchpad += gIdx * (ITERATIONS >> 2); # else # if (STRIDED_INDEX == 0) Scratchpad += gIdx * (MEMORY >> 4); # elif (STRIDED_INDEX == 1) Scratchpad += gIdx; # elif (STRIDED_INDEX == 2) Scratchpad += get_group_id(0) * (MEMORY >> 4) * WORKSIZE + MEM_CHUNK * get_local_id(0); # endif # endif a[0] = states[0] ^ states[4]; a[1] = states[1] ^ states[5]; b[0] = states[2] ^ states[6]; b[1] = states[3] ^ states[7]; b[2] = states[8] ^ states[10]; b[3] = states[9] ^ states[11]; } ulong2 bx0 = ((ulong2 *)b)[0]; ulong2 bx1 = ((ulong2 *)b)[1]; mem_fence(CLK_LOCAL_MEM_FENCE); # ifdef __NV_CL_C_VERSION __local uint16 scratchpad_line_buf[WORKSIZE]; __local uint16* scratchpad_line = scratchpad_line_buf + get_local_id(0); # endif # if (COMP_MODE == 1) // do not use early return here if (gIdx < Threads) # endif { uint r0 = as_uint2(states[12]).s0; uint r1 = as_uint2(states[12]).s1; uint r2 = as_uint2(states[13]).s0; uint r3 = as_uint2(states[13]).s1; #pragma unroll CN_UNROLL for(int i = 0; i < ITERATIONS; ++i) { # ifdef __NV_CL_C_VERSION uint idx = a[0] & 0x1FFFC0; uint idx1 = a[0] & 0x30; *scratchpad_line = *(__global uint16*)((__global uchar*)(Scratchpad) + idx); # else uint idx = a[0] & MASK; # endif uint4 c = SCRATCHPAD_CHUNK(0); c = AES_Round(AES0, AES1, AES2, AES3, c, ((uint4 *)a)[0]); { const ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(1)); const ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2)); const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3)); #if (VARIANT == VARIANT_4) c ^= as_uint4(chunk1) ^ as_uint4(chunk2) ^ as_uint4(chunk3); #endif SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + bx1); SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + bx0); SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]); } SCRATCHPAD_CHUNK(0) = as_uint4(bx0) ^ c; # ifdef __NV_CL_C_VERSION *(__global uint16*)((__global uchar*)(Scratchpad) + idx) = *scratchpad_line; idx = as_ulong2(c).s0 & 0x1FFFC0; idx1 = as_ulong2(c).s0 & 0x30; *scratchpad_line = *(__global uint16*)((__global uchar*)(Scratchpad) + idx); # else idx = as_ulong2(c).s0 & MASK; # endif uint4 tmp = SCRATCHPAD_CHUNK(0); tmp.s0 ^= r0 + r1; tmp.s1 ^= r2 + r3; const uint r4 = as_uint2(a[0]).s0; const uint r5 = as_uint2(a[1]).s0; const uint r6 = as_uint4(bx0).s0; const uint r7 = as_uint4(bx1).s0; #if (VARIANT == VARIANT_4) const uint r8 = as_uint4(bx1).s2; #endif XMRIG_INCLUDE_RANDOM_MATH #if (VARIANT == VARIANT_4) const uint2 al = (uint2)(as_uint2(a[0]).s0 ^ r2, as_uint2(a[0]).s1 ^ r3); const uint2 ah = (uint2)(as_uint2(a[1]).s0 ^ r0, as_uint2(a[1]).s1 ^ r1); #endif ulong2 t; t.s0 = mul_hi(as_ulong2(c).s0, as_ulong2(tmp).s0); t.s1 = as_ulong2(c).s0 * as_ulong2(tmp).s0; { const ulong2 chunk1 = as_ulong2(SCRATCHPAD_CHUNK(1)) #if (VARIANT == VARIANT_WOW) ^ t #endif ; const ulong2 chunk2 = as_ulong2(SCRATCHPAD_CHUNK(2)); #if (VARIANT == VARIANT_WOW) t ^= chunk2; #endif const ulong2 chunk3 = as_ulong2(SCRATCHPAD_CHUNK(3)); #if (VARIANT == VARIANT_4) c ^= as_uint4(chunk1) ^ as_uint4(chunk2) ^ as_uint4(chunk3); #endif SCRATCHPAD_CHUNK(1) = as_uint4(chunk3 + bx1); SCRATCHPAD_CHUNK(2) = as_uint4(chunk1 + bx0); SCRATCHPAD_CHUNK(3) = as_uint4(chunk2 + ((ulong2 *)a)[0]); } #if (VARIANT == VARIANT_4) a[1] = as_ulong(ah) + t.s1; a[0] = as_ulong(al) + t.s0; #else a[1] += t.s1; a[0] += t.s0; #endif SCRATCHPAD_CHUNK(0) = ((uint4 *)a)[0]; # ifdef __NV_CL_C_VERSION *(__global uint16*)((__global uchar*)(Scratchpad) + idx) = *scratchpad_line; # endif ((uint4 *)a)[0] ^= tmp; bx1 = bx0; bx0 = as_ulong2(c); } } mem_fence(CLK_GLOBAL_MEM_FENCE); } )==="