add algo files
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
#矿池算法文件
|
||||
```
|
||||
编译命令(gcc编译器,ubuntu系统)
|
||||
blake2b:
|
||||
sha3x:
|
||||
blake3:
|
||||
heavyHash:
|
||||
randomx:
|
||||
```
|
||||
237
blake2b/blake2b.c
Normal file
237
blake2b/blake2b.c
Normal file
@@ -0,0 +1,237 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include "blake2b.h"
|
||||
|
||||
#ifndef ROTR64
|
||||
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
|
||||
#endif
|
||||
// Little-endian byte access.
|
||||
#define B2B_GET64(p) \
|
||||
(((uint64_t) ((uint8_t *) (p))[0]) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
|
||||
(((uint64_t) ((uint8_t *) (p))[7]) << 56))
|
||||
// G Mixing function.
|
||||
#define B2B_G(a, b, c, d, x, y) { \
|
||||
v[a] = v[a] + v[b] + x; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 32); \
|
||||
v[c] = v[c] + v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 24); \
|
||||
v[a] = v[a] + v[b] + y; \
|
||||
v[d] = ROTR64(v[d] ^ v[a], 16); \
|
||||
v[c] = v[c] + v[d]; \
|
||||
v[b] = ROTR64(v[b] ^ v[c], 63); \
|
||||
}
|
||||
// Initialization Vector.
|
||||
static const uint64_t blake2b_iv[8] = {
|
||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
||||
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
||||
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||
};
|
||||
|
||||
unsigned int be32toh(unsigned int x)
|
||||
{
|
||||
return (((x & 0xff000000U) >> 24) | ((x & 0x00ff0000U) >> 8) |
|
||||
((x & 0x0000ff00U) << 8) | ((x & 0x000000ffU) << 24));
|
||||
}
|
||||
|
||||
static void blake2b_compress(blake2b_ctx *ctx, int last)
|
||||
{
|
||||
const uint8_t sigma[12][16] = {
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
||||
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
||||
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
||||
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
||||
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
||||
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
||||
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
||||
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
||||
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
||||
};
|
||||
int i;
|
||||
uint64_t v[16], m[16];
|
||||
//long int v15;
|
||||
for (i = 0; i < 8; i++) { // init work variables
|
||||
v[i] = ctx->h[i];
|
||||
v[i + 8] = blake2b_iv[i];
|
||||
}
|
||||
//v15= v[15];
|
||||
//printf("the v15=%02lx\n" ,v15);
|
||||
|
||||
v[12] ^= ctx->t[0]; // low 64 bits of offset
|
||||
v[13] ^= ctx->t[1]; // high 64 bits
|
||||
|
||||
//printf("ctx->t[0]= %016llx\n",ctx->t[0]);
|
||||
//printf("ctx->t[1]= %016llx\n",ctx->t[1]);
|
||||
|
||||
if (last) // last block flag set ?
|
||||
v[14] = ~v[14];
|
||||
|
||||
for (i = 0; i < 16; i++) // get little-endian words
|
||||
m[i] = B2B_GET64(&ctx->b[8 * i]);
|
||||
|
||||
//for (int i = 0; i < 16; ++i) printf("v[%0d]=%016llx\n", i,v[i]);
|
||||
//for (int i = 0; i < 16; ++i) printf("m[%0d]=%016llx\n", i,m[i]);
|
||||
|
||||
for (i = 0; i < 12; i++) { // twelve rounds for (i = 0; i < 12; i++) { // twelve rounds
|
||||
//printf("i=%0d\n",i);
|
||||
//for (int i = 0; i < 16; ++i) printf("v[%0d]=%016llx\n", i,v[i]);
|
||||
//for (int i = 0; i < 16; ++i) printf("m[%0d]=%016llx\n", i,m[i]);
|
||||
|
||||
B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
|
||||
B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
|
||||
B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
|
||||
B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
|
||||
B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
|
||||
B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
|
||||
B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
|
||||
B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
|
||||
|
||||
}
|
||||
//for (int i = 0; i < 16; ++i) printf("v[%0d]=%016llx\n", i,v[i]);
|
||||
//v15= v[15];
|
||||
//printf("the v15=%02lx\n" ,v15);
|
||||
for( i = 0; i < 8; ++i )
|
||||
ctx->h[i] ^= v[i] ^ v[i + 8];
|
||||
//v15= v[15];
|
||||
//printf("the v15=%02lx\n" ,v15);
|
||||
}
|
||||
|
||||
void blake2b_update(blake2b_ctx *ctx, const void *in, size_t inlen) // data bytes
|
||||
{
|
||||
size_t i;
|
||||
for (i = 0; i < inlen; i++) {
|
||||
if (ctx->c == 128) { // buffer full ?
|
||||
ctx->t[0] += ctx->c; // add counters
|
||||
if (ctx->t[0] < ctx->c) // carry overflow ?
|
||||
ctx->t[1]++; // high word
|
||||
blake2b_compress(ctx, 0); // compress (not last)
|
||||
ctx->c = 0; // counter to zero
|
||||
//for (int i = 0; i < 8; ++i) printf("ctx->h[%0d]=%016llx\n",i,ctx->h[i]);
|
||||
}
|
||||
ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
|
||||
}
|
||||
/*
|
||||
int t0,t1;
|
||||
t0 = ctx->t[0];
|
||||
t1 = ctx->t[1];
|
||||
printf("the t[0]=%02x,the t[1]=%02x\n", t0,t1);
|
||||
*/
|
||||
//printf("the t[0]=%02x,the t[1]=%02x\n", ctx->t[0],ctx->t[1]);
|
||||
}
|
||||
|
||||
int blake2b_init(blake2b_ctx *ctx, size_t outlen) // (keylen=0: no key)
|
||||
{
|
||||
//size_t i;
|
||||
|
||||
//if (outlen == 0 || outlen > 64 || keylen > 64)
|
||||
// return -1; // illegal parameters
|
||||
//
|
||||
//for (i = 0; i < 8; i++) // state, "param block"
|
||||
// ctx->h[i] = blake2b_iv[i];
|
||||
//
|
||||
// ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
|
||||
// ctx->t[0] = 0; // input count low word
|
||||
// ctx->t[1] = 0; // input count high word
|
||||
// ctx->c = 0; // pointer within buffer
|
||||
// ctx->outlen = outlen;
|
||||
//
|
||||
//for (i = keylen; i < 128; i++) // zero input block
|
||||
// ctx->b[i] = 0;
|
||||
//if (keylen > 0) {
|
||||
// blake2b_update(ctx, key, keylen);
|
||||
// ctx->c = 128; // at the end
|
||||
// }
|
||||
ctx->h[0]= 0x6a09e667f2bdc93a;
|
||||
ctx->h[1]= 0xbb67ae8584caa73b;
|
||||
ctx->h[2]= 0x3c6ef372fe94f82b;
|
||||
ctx->h[3]= 0xa54ff53a5f1d36f1;
|
||||
ctx->h[4]= 0x510e527fade682d1;
|
||||
ctx->h[5]= 0x9b05688c2b3e6c1f;
|
||||
ctx->h[6]= 0x48ec89c38820de31;
|
||||
ctx->h[7]= 0x5be0cd10137e21b1;
|
||||
ctx->t[0] = 0; // input count low word
|
||||
ctx->t[1] = 0; // input count high word
|
||||
ctx->c = 0; // pointer within buffer
|
||||
ctx->outlen = outlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void blake2b_final(blake2b_ctx *ctx, void *out)
|
||||
{
|
||||
size_t i;
|
||||
ctx->t[0] += ctx->c; // mark last block offset
|
||||
if (ctx->t[0] < ctx->c) // carry overflow
|
||||
ctx->t[1]++; // high word
|
||||
while (ctx->c < 128) // fill up with zeros
|
||||
ctx->b[ctx->c++] = 0;
|
||||
//printf("the msg is :\n");
|
||||
//for (int i = 0; i < 128; ++i) printf("%02x",ctx->b[i]);
|
||||
//printf("\n");
|
||||
blake2b_compress(ctx, 1); // final block flag = 1
|
||||
// little endian convert and store
|
||||
/*
|
||||
int t0,t1;
|
||||
t0 = ctx->t[0];
|
||||
t1 = ctx->t[1];
|
||||
printf("the t[0]=%02x,the t[1]=%02x\n", t0,t1);
|
||||
*/
|
||||
//for (i = 0; i < 128; i++)
|
||||
// ((uint8_t *) msg_s1)[i] = ctx->b[i];
|
||||
//for (int i = 0; i < 8; ++i) printf("ctx->h[%0d]=%016llx\n",i,ctx->h[i]);
|
||||
//printf("ctx->outlen= %0d\n",ctx->outlen );
|
||||
for (i = 0; i < ctx->outlen; i++) {
|
||||
((uint8_t *) out)[i] = (ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
int blake2b(void *out, size_t outlen,const void *key, size_t keylen,const void *in, size_t inlen,void *msg_s1)
|
||||
{
|
||||
blake2b_ctx ctx;
|
||||
if (blake2b_init(&ctx, outlen, key, keylen))
|
||||
return -1;
|
||||
blake2b_update(&ctx, in, inlen);
|
||||
blake2b_final(&ctx, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
int i,j;
|
||||
uint8_t md[50];
|
||||
uint8_t msg_s1[128];
|
||||
|
||||
uint8_t in[140+4] = {
|
||||
0x04,0x00,0x00,0x00,
|
||||
0xe5,0x4c,0x27,0x54,0x40,0x50,0x66,0x8f,0x27,0x2e,0xc3,0xb4,0x60,0xe1,0xcd,0xe7,
|
||||
0x45,0xc6,0xb2,0x12,0x39,0xa8,0x1d,0xae,0x63,0x7f,0xde,0x47,0x04,0x00,0x00,0x00,
|
||||
0x84,0x4b,0xc0,0xc5,0x56,0x96,0xef,0x99,0x20,0xee,0xda,0x11,0xc1,0xeb,0x41,0xb0,
|
||||
0xc2,0xe7,0x32,0x4b,0x46,0xcc,0x2e,0x7a,0xa0,0xc2,0xaa,0x77,0x36,0x44,0x8d,0x7a,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x68,0x24,0x1a,0x58,
|
||||
0x7e,0x7e,0x06,0x1d,
|
||||
0x25,0x0e,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
||||
0x01,0x00,0x00,0x00
|
||||
};
|
||||
|
||||
blake2b(md, 50, NULL, 0, in, sizeof(in), msg_s1);
|
||||
|
||||
printf("the hash out is \n");
|
||||
for (i=0; i < sizeof(md); ++i) {
|
||||
printf("%02x",md[i]);
|
||||
}
|
||||
printf("\n");
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
24
blake2b/blake2b.h
Normal file
24
blake2b/blake2b.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
enum blake2b_constant
|
||||
{
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint8_t b[128]; // input buffer
|
||||
uint64_t h[8]; // chained state
|
||||
uint64_t t[2]; // total number of bytes
|
||||
size_t c; // pointer for b[]
|
||||
size_t outlen; // digest size
|
||||
} blake2b_ctx;
|
||||
|
||||
void blake2b_update(blake2b_ctx *ctx, const void *in, size_t inlen);
|
||||
int blake2b_init(blake2b_ctx *ctx, size_t outlen);
|
||||
void blake2b_final(blake2b_ctx *ctx, void *out);
|
||||
192
heavyHash/DiagonalMatrix.h
Normal file
192
heavyHash/DiagonalMatrix.h
Normal file
@@ -0,0 +1,192 @@
|
||||
#ifndef _SINGULAR_DIAGONAL_MATRIX_H
|
||||
#define _SINGULAR_DIAGONAL_MATRIX_H
|
||||
|
||||
#include "singular.h"
|
||||
|
||||
|
||||
|
||||
//#define L(M,N) (M < N ? M : N)
|
||||
#define L(M,N) (M*N)
|
||||
#if 1
|
||||
|
||||
typedef struct class_DiagonalMatrix DiagonalMatrix_t;
|
||||
struct class_DiagonalMatrix {
|
||||
double *pBlock;
|
||||
|
||||
double (*operator)(struct class_DiagonalMatrix *p, int i, int j);
|
||||
void (*release)(struct class_DiagonalMatrix *p);
|
||||
};
|
||||
|
||||
#else
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
//namespace singular {
|
||||
|
||||
/**
|
||||
* Diagonal matrix.
|
||||
*/
|
||||
template < int M, int N >
|
||||
class DiagonalMatrix {
|
||||
public:
|
||||
enum {
|
||||
/** Number of diagonal elements. */
|
||||
L = M < N ? M : N
|
||||
};
|
||||
private:
|
||||
/**
|
||||
* Memory block for the diagonal elements.
|
||||
* The ith row and ith column is given by `elements[i]`.
|
||||
*/
|
||||
double* pBlock;
|
||||
public:
|
||||
/** Initializes a diagonal matrix filled with 0. */
|
||||
DiagonalMatrix() {
|
||||
this->pBlock = new double[L];
|
||||
std::fill(this->pBlock, this->pBlock + L, 0.0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes a diagonal matrix with given diagonal values.
|
||||
*
|
||||
* The diagonal matrix will look like,
|
||||
* \f[
|
||||
* \begin{bmatrix}
|
||||
* \text{values[0]} & & \\
|
||||
* & \ddots & \\
|
||||
* & & \text{values[min(M, N)-1]}
|
||||
* \end{bmatrix}
|
||||
* \f]
|
||||
*
|
||||
* The behavior is undefined if `values` has less than `min(M, N)`
|
||||
* elements.
|
||||
*
|
||||
* @param values
|
||||
* Diagonal values of the matrix.
|
||||
*/
|
||||
explicit DiagonalMatrix(const double values[]) {
|
||||
this->pBlock = new double[L];
|
||||
memcpy(this->pBlock, values, sizeof(double) * L);
|
||||
}
|
||||
|
||||
/**
|
||||
* Steals the memory block from a given diagonal matrix.
|
||||
*
|
||||
* @param[in,out] copyee
|
||||
* Diagonal matrix from which the memory block is to be stolen.
|
||||
* No loger valid after this call.
|
||||
*/
|
||||
#if SINGULAR_RVALUE_REFERENCE_SUPPORTED
|
||||
DiagonalMatrix(DiagonalMatrix&& copyee) : pBlock(copyee.pBlock) {
|
||||
copyee.pBlock = nullptr;
|
||||
}
|
||||
#else
|
||||
DiagonalMatrix(const DiagonalMatrix& copyee) : pBlock(copyee.pBlock) {
|
||||
const_cast< DiagonalMatrix& >(copyee).pBlock = nullptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Releases the memory block of this diagonal matrix. */
|
||||
~DiagonalMatrix() {
|
||||
this->release();
|
||||
}
|
||||
|
||||
/**
|
||||
* Steals the memory block from a given diagonal matrix.
|
||||
*
|
||||
* @param[in,out] copyee
|
||||
* Diagonal matrix from which the memory block is to be stolen.
|
||||
* No longer valid after this call.
|
||||
* @return
|
||||
* Reference to this diagonal matrix.
|
||||
*/
|
||||
#if SINGULAR_RVALUE_REFERENCE_SUPPORTED
|
||||
DiagonalMatrix& operator =(DiagonalMatrix&& copyee) {
|
||||
#else
|
||||
DiagonalMatrix& operator =(const DiagonalMatrix& copyee) {
|
||||
#endif
|
||||
this->release();
|
||||
this->pBlock = copyee.pBlock;
|
||||
#if SINGULAR_RVALUE_REFERENCE_SUPPORTED
|
||||
copyee.pBlock = nullptr;
|
||||
#else
|
||||
const_cast< DiagonalMatrix& >(copyee).pBlock = nullptr;
|
||||
#endif
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a clone of this matrix.
|
||||
*
|
||||
* @return
|
||||
* Clone of this matrix.
|
||||
*/
|
||||
inline DiagonalMatrix clone() const {
|
||||
return DiagonalMatrix(this->pBlock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the element at a given row and column.
|
||||
*
|
||||
* The behavior is undefined,
|
||||
* - if `i < 0` or `i >= M`,
|
||||
* - or if `j < 0` or `j >= N`
|
||||
*
|
||||
* @param i
|
||||
* Index of the row to be obtained.
|
||||
* @param j
|
||||
* Index of the column to be obtained.
|
||||
* @return
|
||||
* Element at the ith row and jth column.
|
||||
* 0 if `i != j`.
|
||||
*/
|
||||
double operator ()(int i, int j) const {
|
||||
assert(i >= 0 && i < M);
|
||||
assert(j >= 0 && j < N);
|
||||
if (i == j) {
|
||||
return this->pBlock[i];
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Transposes this matrix.
|
||||
*
|
||||
* @return
|
||||
* Transposed matrix.
|
||||
*/
|
||||
DiagonalMatrix< N, M > transpose() const {
|
||||
return DiagonalMatrix< N, M >(this->pBlock);
|
||||
}
|
||||
private:
|
||||
#if SINGULAR_FUNCTION_DELETION_SUPPORTED
|
||||
/** Copy constructor is not allowed. */
|
||||
DiagonalMatrix(const DiagonalMatrix& copyee) = delete;
|
||||
|
||||
/** Copy assignment is not allowed. */
|
||||
DiagonalMatrix& operator =(const DiagonalMatrix& copyee) = delete;
|
||||
#elif SINGULAR_RVALUE_REFERENCE_SUPPORTED
|
||||
/** Copy constructor is not allowed. */
|
||||
DiagonalMatrix(const DiagonalMatrix& copyee) {}
|
||||
|
||||
/** Copy assignment is not allowed. */
|
||||
DiagonalMatrix& operator =(const DiagonalMatrix& copyee) {
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Releases the memory block of this matrix.
|
||||
* Has no effect if the memory block has already been released.
|
||||
*/
|
||||
inline void release() {
|
||||
delete[] this->pBlock;
|
||||
this->pBlock = nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
//}
|
||||
#endif
|
||||
#endif
|
||||
14
heavyHash/Makefile
Normal file
14
heavyHash/Makefile
Normal file
@@ -0,0 +1,14 @@
|
||||
SRCS = heavyhash.c obtc.c sha3.c
|
||||
|
||||
OBJS = $(SRCS:.c=.o)
|
||||
CC = gcc
|
||||
CCFLAGS = -Wall
|
||||
|
||||
libkas.a:$(OBJS)
|
||||
ar -rv libkas.a $(OBJS)
|
||||
|
||||
%.o:%.c
|
||||
$(CC) $(CCFLAGS) -c $< -o $@
|
||||
|
||||
clean:
|
||||
rm -rf *.o *.a
|
||||
25
heavyHash/Matrix.h
Normal file
25
heavyHash/Matrix.h
Normal file
@@ -0,0 +1,25 @@
|
||||
#ifndef _SINGULAR_MATRIX_H
|
||||
#define _SINGULAR_MATRIX_H
|
||||
|
||||
#include "singular.h"
|
||||
#include "Vector.h"
|
||||
|
||||
//#include <algorithm>
|
||||
//#include <cstring>
|
||||
//#include <iostream>
|
||||
|
||||
|
||||
typedef struct class_Matrix Matrix_t;
|
||||
struct class_Matrix {
|
||||
double* pBlock;
|
||||
|
||||
Matrix_t (*clone)(struct class_Matrix *p);
|
||||
void (*filledwith)(struct class_Matrix *p,const double values[]);
|
||||
double (*operator)(struct class_Matrix *p, int i, int j);
|
||||
Vector_t (*row)(struct class_Matrix *p, int i);
|
||||
Vector_t (*column)(struct class_Matrix *p, int j);
|
||||
void (*release)(struct class_Matrix *p);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
19
heavyHash/Reflector.h
Normal file
19
heavyHash/Reflector.h
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef _SINGULAR_REFLECTOR_H
|
||||
#define _SINGULAR_REFLECTOR_H
|
||||
|
||||
#include "Matrix.h"
|
||||
#include "singular.h"
|
||||
|
||||
|
||||
|
||||
typedef struct class_Reflector Reflector_t;
|
||||
struct class_Reflector {
|
||||
Vector_t u;
|
||||
double gamma;
|
||||
size_t L;
|
||||
|
||||
double* ptr;
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
17
heavyHash/Rotator.h
Normal file
17
heavyHash/Rotator.h
Normal file
@@ -0,0 +1,17 @@
|
||||
#ifndef _SINGULAR_ROTATOR_H
|
||||
#define _SINGULAR_ROTATOR_H
|
||||
|
||||
#include "Matrix.h"
|
||||
#include "singular.h"
|
||||
|
||||
|
||||
typedef struct class_Rotator Rotator_t;
|
||||
struct class_Rotator {
|
||||
double elements[4];
|
||||
double (*operator)(struct class_Rotator *p, int i, int j);
|
||||
void (*applyFromLeftTo)(struct class_Rotator *p, Matrix_t rhs, int k);
|
||||
void (*applyFromRightTo)(struct class_Rotator *p, Matrix_t rhs, int k);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
45
heavyHash/Svd.h
Normal file
45
heavyHash/Svd.h
Normal file
@@ -0,0 +1,45 @@
|
||||
#ifndef _SINGULAR_SVD_H
|
||||
#define _SINGULAR_SVD_H
|
||||
|
||||
#include "DiagonalMatrix.h"
|
||||
#include "Matrix.h"
|
||||
#include "Reflector.h"
|
||||
#include "Rotator.h"
|
||||
//#include "singular.h"
|
||||
|
||||
//#include <algorithm>
|
||||
//#include <cassert>
|
||||
//#include <tuple>
|
||||
|
||||
typedef struct Svd Svd_t;
|
||||
struct Svd {
|
||||
//USV decomposeUSV(const Matrix< M, N >& m)
|
||||
bool (*isFullRank)(Svd_t *p, DiagonalMatrix_t singularValues, const int size);
|
||||
};
|
||||
|
||||
typedef struct class_BidiagonalMatrix BidiagonalMatrix_t;
|
||||
struct class_BidiagonalMatrix {
|
||||
double* pBlock;
|
||||
double (*operator)(struct class_BidiagonalMatrix *p, int i, int j);
|
||||
double (*applyFirstRotatorFromRight)(struct class_BidiagonalMatrix *p, Rotator_t *r);
|
||||
double (*applyRotatorFromRight)(struct class_BidiagonalMatrix *p, Rotator_t *r, int n, double bulge);
|
||||
double (*applyRotatorFromLeft)(struct class_BidiagonalMatrix *p, Rotator_t *r, int n, double bulge);
|
||||
BidiagonalMatrix_t (*bidiagonalize)(struct class_BidiagonalMatrix *p, Matrix_t m);
|
||||
void (*doFrancis)(struct class_BidiagonalMatrix *m,int n);
|
||||
double (*calculateShift)(struct class_BidiagonalMatrix *m, int n);
|
||||
void (*releases)(struct class_BidiagonalMatrix *p);
|
||||
};
|
||||
|
||||
void BidiagonalMatrix_doFrancis(BidiagonalMatrix_t *m, int n);
|
||||
double BidiagonalMatrix_calculateShift(BidiagonalMatrix_t *m, int n);
|
||||
double BidiagonalMatrix_applyRotatorFromLeft(BidiagonalMatrix_t *ptr, Rotator_t *r, int n, double bulge);
|
||||
double BidiagonalMatrix_applyRotatorFromRight(BidiagonalMatrix_t *ptr, Rotator_t *r, int n, double bulge);
|
||||
double BidiagonalMatrix_applyFirstRotatorFromRight(BidiagonalMatrix_t *p, Rotator_t *r);
|
||||
double BidiagonalMatrix_operator(BidiagonalMatrix_t *p, int i, int j);
|
||||
void BidiagonalMatrix_release(BidiagonalMatrix_t *p);
|
||||
void BidiagonalMatrix_init(BidiagonalMatrix_t *p, Matrix_t *m);
|
||||
void BidiagonalMatrix_def(BidiagonalMatrix_t *p);
|
||||
BidiagonalMatrix_t BidiagonalMatrix_bidiagonalize(BidiagonalMatrix_t *p, Matrix_t m);
|
||||
|
||||
|
||||
#endif
|
||||
22
heavyHash/Vector.h
Normal file
22
heavyHash/Vector.h
Normal file
@@ -0,0 +1,22 @@
|
||||
#ifndef _SINGULAR_VECTOR_H
|
||||
#define _SINGULAR_VECTOR_H
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
#include "singular.h"
|
||||
|
||||
|
||||
typedef struct class_Vector Vector_t;
|
||||
struct class_Vector {
|
||||
double* pBlock;
|
||||
size_t len;
|
||||
ptrdiff_t delta;
|
||||
|
||||
double* ptr;
|
||||
void (*move)(struct class_Vector *p, ptrdiff_t delta);
|
||||
double (*operator)(struct class_Vector *p, size_t idx);
|
||||
Vector_t (*slice)(struct class_Vector *p, size_t start);
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
150
heavyHash/heavyhash.c
Normal file
150
heavyHash/heavyhash.c
Normal file
@@ -0,0 +1,150 @@
|
||||
#include "sha3.h"
|
||||
#include "obtc.h"
|
||||
|
||||
|
||||
|
||||
|
||||
void CSHA3_256_Write(CSHA3_256 *p, const unsigned char* data, size_t len) {
|
||||
sha3_update(&p->context, data, len);
|
||||
//return *this;
|
||||
}
|
||||
|
||||
void CSHA3_256_Finalize(CSHA3_256 *p, unsigned char hash[OUTPUT_SIZE]) {
|
||||
sha3_final(hash, &p->context);
|
||||
}
|
||||
|
||||
/*void CSHA3_256_Reset(Obtc_t *Obtc, CSHA3_256 *p) {
|
||||
sha3_init(Obtc,&p->context, OUTPUT_SIZE);
|
||||
//return *this;
|
||||
}*/
|
||||
|
||||
void CSHA3_256_init(Obtc_t *Obtc, CSHA3_256 *p) {
|
||||
|
||||
sha3_init(Obtc, &p->context, OUTPUT_SIZE);
|
||||
|
||||
p->Write = CSHA3_256_Write;
|
||||
p->Finalize = CSHA3_256_Finalize;
|
||||
//p->Reset = CSHA3_256_Reset;
|
||||
}
|
||||
|
||||
void CSHA3_256_CSHA3_256(Obtc_t *Obtc,CSHA3_256 *p) {
|
||||
sha3_init(Obtc,&p->context, OUTPUT_SIZE);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void CHeavyHash_Write(CHeavyHash *p, const unsigned char* data, size_t len) {
|
||||
p->hasher.Write(&p->hasher,data, len);
|
||||
//sha3_update(&CSHA3_256_p.context, data, len);
|
||||
//CSHA3_256_Write(&CSHA3_256_p, data, OUTPUT_SIZE);
|
||||
}
|
||||
|
||||
void CHeavyHash_Finalize(Obtc_t *Obtc, CHeavyHash *p, unsigned char hash[OUTPUT_SIZE]) {
|
||||
uint256 hash_first;
|
||||
uint8_t a[32];
|
||||
|
||||
p->hasher.Finalize(&p->hasher,&Obtc->g_hash_first.bb.data[0]);
|
||||
memcpy(a,&Obtc->g_hash_first.bb.data[0],32);
|
||||
|
||||
uint256 product = MultiplyUsing4bitPrecision(p->matrix, Obtc->g_hash_first);
|
||||
|
||||
uint256 hash_xored;
|
||||
for (size_t i = 0; i < OUTPUT_SIZE; ++i) {
|
||||
//hash_xored.begin()[i] = hash_first.begin()[i] ^ product.begin()[i];
|
||||
hash_xored.bb.data[i] = Obtc->g_hash_first.bb.data[i] ^ product.bb.data[i];
|
||||
|
||||
|
||||
}
|
||||
|
||||
uint8_t temp[200]={
|
||||
0x16,0x19,0x32,0x7d,0x10,0xb9,0xda,0x35,0x54,0x9a,0xe0,0x31,0x2f,0x9f,0xc6,0x15,0x92,0xbb,0x39,0x9d,
|
||||
0xb5,0x29,0x0c,0x0a,0x47,0xc3,0x9f,0x67,0x51,0x12,0xc2,0x2e,0xc7,0x76,0xc5,0x04,0x84,0x81,0xb9,0x57,
|
||||
0xb9,0x92,0xf2,0xd3,0x7b,0x34,0xca,0x58,0xea,0x8f,0xdb,0x80,0xba,0xc4,0x6d,0x39,0x7e,0x8f,0x1d,0xb1,
|
||||
0x77,0x65,0xcc,0x07,0x87,0xe9,0x61,0xb0,0x36,0xbc,0x94,0x16,0x77,0x4c,0x86,0x83,0x54,0x34,0xf2,0xb0,
|
||||
0x4e,0xf7,0x4b,0x3a,0x99,0xcd,0xb0,0x44,0x2e,0xc6,0x5b,0xd3,0x56,0x24,0x93,0xe4,0x6c,0x6b,0x7d,0x01,
|
||||
0xa7,0x69,0xcc,0x3d,0xd3,0x1f,0x4c,0xc3,0x54,0xc1,0x8c,0x3f,0xf4,0x31,0xc0,0x5d,0xd0,0xa9,0xa2,0x26,
|
||||
0xa0,0xbc,0xaa,0x9f,0x79,0x2a,0x3d,0x0c,0x80,0x39,0xf9,0xa6,0x0d,0xcf,0x6a,0x48,0x5e,0x21,0x90,0x40,
|
||||
0x25,0x0f,0xc4,0x62,0xc1,0x00,0xff,0x2a,0x93,0x89,0x35,0xba,0x72,0xc7,0xd8,0x2e,0x14,0xf3,0x40,0x69,
|
||||
0xe7,0x20,0xe0,0xdf,0x44,0xee,0xce,0xde,0x11,0xa7,0x5f,0x4c,0x80,0x05,0x64,0x98,0x7a,0x14,0xff,0x48,
|
||||
0x16,0xc7,0xf8,0xee,0x79,0x62,0x9b,0x0e,0x2f,0x9f,0x42,0x16,0x3a,0xd7,0x4c,0x52,0xb2,0x24,0x85,0x09,
|
||||
};
|
||||
for(int i = 0 ;i< 200 ;i++)Obtc->const_data[i] = temp[i];
|
||||
|
||||
// CSHA3_256().Write(hash_xored.begin(), OUTPUT_SIZE).Finalize(hash);
|
||||
|
||||
CSHA3_256_CSHA3_256(Obtc, &p->hasher);
|
||||
CSHA3_256_Write(&p->hasher, &hash_xored.bb.data[0], OUTPUT_SIZE);
|
||||
CSHA3_256_Finalize(&p->hasher, hash) ;
|
||||
}
|
||||
|
||||
void CHeavyHash_Reset(CHeavyHash *p, uint64_t matrix_[64*64]) {
|
||||
for (int i = 0; i < 64*64; ++i)
|
||||
p->matrix[i] = matrix_[i];
|
||||
}
|
||||
|
||||
void CHeavyHash_init(Obtc_t *Obtc, CHeavyHash *p, uint64_t matrix_[64*64]){
|
||||
|
||||
p->Write = CHeavyHash_Write;
|
||||
p->Finalize = CHeavyHash_Finalize;
|
||||
p->Reset = CHeavyHash_Reset;
|
||||
|
||||
p->hasher.Write = CSHA3_256_Write;
|
||||
p->hasher.Finalize = CSHA3_256_Finalize;
|
||||
//p->hasher.Reset = CSHA3_256_Reset;
|
||||
|
||||
sha3_init(Obtc, &p->hasher.context, OUTPUT_SIZE);
|
||||
|
||||
for (int i = 0; i < 64*64; ++i)
|
||||
p->matrix[i] = matrix_[i];
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void MultiplyMatrices(uint64_t matrix[64*64], uint64_t vector[64], uint64_t product[64]){
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
product[i] += matrix[64*i + j]*vector[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint256 MultiplyUsing4bitPrecision(uint64_t matrix[64*64], const uint256 hash) {
|
||||
// conversion to matrix with 4 bit values
|
||||
uint64_t vector[64] = {0};
|
||||
ConvertTo4BitPrecisionVector(hash, vector);
|
||||
|
||||
// perform matrix multiplication
|
||||
uint64_t product[64] = {0};
|
||||
MultiplyMatrices(matrix, vector, product);
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
product[i] >>= 10;
|
||||
}
|
||||
return Convert4bitVectorToUint(product);
|
||||
}
|
||||
|
||||
void ConvertTo4BitPrecisionVector(uint256 bit_sequence, uint64_t vector[64]) {
|
||||
int index = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < WIDTH; i++) {
|
||||
|
||||
vector[index] = bit_sequence.bb.data[i] >> 4;
|
||||
vector[index+1] = bit_sequence.bb.data[i] & 0xF;
|
||||
index += 2;
|
||||
}
|
||||
}
|
||||
|
||||
uint256 Convert4bitVectorToUint(const uint64_t x[64]) {
|
||||
uint256 bit_sequence;
|
||||
int index = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < WIDTH; i++) {
|
||||
bit_sequence.bb.data[i] = ( x[index] << 4) | x[index+1];
|
||||
index += 2;
|
||||
}
|
||||
|
||||
return bit_sequence;
|
||||
}
|
||||
98
heavyHash/heavyhash.h
Normal file
98
heavyHash/heavyhash.h
Normal file
@@ -0,0 +1,98 @@
|
||||
#ifndef OPOW_CRYPTO_HEAVYHASH_H
|
||||
#define OPOW_CRYPTO_HEAVYHASH_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include "sha3.h"
|
||||
|
||||
//#include <memory>
|
||||
//#include "obtc.h"
|
||||
|
||||
|
||||
#define OUTPUT_SIZE 32
|
||||
|
||||
typedef struct class_CSHA3_256 CSHA3_256;
|
||||
|
||||
struct class_CSHA3_256
|
||||
{
|
||||
sha3_ctx_t context;
|
||||
|
||||
// static const size_t OUTPUT_SIZE = 32;
|
||||
|
||||
//CSHA3_256& Write(const unsigned char* data, size_t len);
|
||||
void (*Write)(struct class_CSHA3_256 *p, const unsigned char* data, size_t len);
|
||||
void (*Finalize)(struct class_CSHA3_256 *p, unsigned char hash[OUTPUT_SIZE]);
|
||||
//CSHA3_256& Reset();
|
||||
};
|
||||
|
||||
|
||||
typedef struct class_CHeavyHash CHeavyHash;
|
||||
struct class_CHeavyHash
|
||||
{
|
||||
|
||||
uint64_t matrix[64*64];
|
||||
CSHA3_256 hasher;
|
||||
|
||||
//static const size_t OUTPUT_SIZE = 32;
|
||||
//explicit CHeavyHash(uint64_t matrix_[64*64]);
|
||||
//CHeavyHash& Reset(uint64_t matrix_[64*64]);
|
||||
//CHeavyHash& Write(const unsigned char* data, size_t len);
|
||||
//void Finalize(unsigned char hash[OUTPUT_SIZE]);
|
||||
void (*Reset)(struct class_CHeavyHash *p, uint64_t matrix_[64*64]);
|
||||
void (*Write)(struct class_CHeavyHash *p, const unsigned char* data, size_t len);
|
||||
void (*Finalize)(struct class_CHeavyHash *p, unsigned char hash[OUTPUT_SIZE]);
|
||||
};
|
||||
|
||||
#if 0
|
||||
/** A hasher class for SHA3-256. */
|
||||
class CSHA3_256
|
||||
{
|
||||
private:
|
||||
sha3_ctx_t context;
|
||||
|
||||
public:
|
||||
static const size_t OUTPUT_SIZE = 32;
|
||||
|
||||
CSHA3_256();
|
||||
CSHA3_256& Write(const unsigned char* data, size_t len);
|
||||
void Finalize(unsigned char hash[OUTPUT_SIZE]);
|
||||
CSHA3_256& Reset();
|
||||
};
|
||||
|
||||
class CHeavyHash
|
||||
{
|
||||
private:
|
||||
uint64_t matrix[64*64];
|
||||
CSHA3_256 hasher;
|
||||
|
||||
public:
|
||||
static const size_t OUTPUT_SIZE = 32;
|
||||
explicit CHeavyHash(uint64_t matrix_[64*64]);
|
||||
CHeavyHash& Reset(uint64_t matrix_[64*64]);
|
||||
CHeavyHash& Write(const unsigned char* data, size_t len);
|
||||
void Finalize(unsigned char hash[OUTPUT_SIZE]);
|
||||
};
|
||||
#endif
|
||||
uint256 MultiplyUsing4bitPrecision(uint64_t matrix[64*64], const uint256 hash);
|
||||
|
||||
void ConvertTo4BitPrecisionVector(uint256 bit_sequence, uint64_t vector[64]);
|
||||
|
||||
uint256 Convert4bitVectorToUint(const uint64_t x[64]);
|
||||
|
||||
|
||||
//zzj add
|
||||
/*extern void CSHA3_256_init(struct Obtc_opt *Obtc, CSHA3_256 *p);
|
||||
void CSHA3_256_CSHA3_256(struct Obtc_opt *Obtc, CSHA3_256 *p);
|
||||
|
||||
void CSHA3_256_Write(CSHA3_256 *p, const unsigned char* data, size_t len);
|
||||
|
||||
void CSHA3_256_Finalize(CSHA3_256 *p, unsigned char hash[OUTPUT_SIZE]);
|
||||
//
|
||||
|
||||
void CHeavyHash_init(struct Obtc_opt *Obtc, CHeavyHash *p, uint64_t matrix_[64*64]);
|
||||
void CHeavyHash_Write(CHeavyHash *p, const unsigned char* data, size_t len);
|
||||
|
||||
void CHeavyHash_Finalize(struct Obtc_opt *Obtc, CHeavyHash *p, unsigned char hash[OUTPUT_SIZE]);
|
||||
*/
|
||||
|
||||
#endif // OPOW_CRYPTO_HEAVYHASH_H
|
||||
BIN
heavyHash/heavyhash.o
Normal file
BIN
heavyHash/heavyhash.o
Normal file
Binary file not shown.
BIN
heavyHash/libkas.a
Normal file
BIN
heavyHash/libkas.a
Normal file
Binary file not shown.
907
heavyHash/obtc.c
Normal file
907
heavyHash/obtc.c
Normal file
@@ -0,0 +1,907 @@
|
||||
//! heavyhash extracted from optical bitcoin
|
||||
//! 2022 barrystyle
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <search.h>//qsort
|
||||
#include<time.h>
|
||||
|
||||
#include "obtc.h"
|
||||
|
||||
|
||||
#define M 64
|
||||
#define N 64
|
||||
|
||||
bool Is4BitPrecision(const uint64_t matrix[64*64])
|
||||
{
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
if (matrix[ i*64 + j] > 0xF)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
double DiagonalMatrix_operator(DiagonalMatrix_t *p, int i, int j)
|
||||
{
|
||||
assert(i >= 0 && i < 64);
|
||||
assert(j >= 0 && j < 64);
|
||||
if (i == j) {
|
||||
return p->pBlock[i];
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
void DiagonalMatrix_release(DiagonalMatrix_t *p)
|
||||
{
|
||||
if (p->pBlock != NULL){
|
||||
free(p->pBlock);
|
||||
p->pBlock = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void DiagonalMatrix_init(DiagonalMatrix_t *p, const double values[])
|
||||
{
|
||||
p->pBlock = (double *)malloc(sizeof(double)*M);
|
||||
//memset(pBlock, 0.0, sizeof(double)*L(64,64));
|
||||
memcpy(p->pBlock, values, sizeof(double) * M);
|
||||
|
||||
p->operator = DiagonalMatrix_operator;
|
||||
p->release = DiagonalMatrix_release;
|
||||
|
||||
}
|
||||
|
||||
void DiagonalMatrix_DiagonalMatrix(DiagonalMatrix_t *p)
|
||||
{
|
||||
p->operator = DiagonalMatrix_operator;
|
||||
p->release = DiagonalMatrix_release;
|
||||
}
|
||||
|
||||
//-----------------------------vector-------------------------------//
|
||||
|
||||
void vector_move(Vector_t *p, ptrdiff_t delta) {
|
||||
p->ptr += delta;
|
||||
}
|
||||
|
||||
Vector_t vector_slice(Vector_t v, size_t start) {
|
||||
//assert(start >= 0 && start <= p->len);
|
||||
Vector_t v_tmp;
|
||||
v_tmp.pBlock = v.pBlock + start * v.delta;
|
||||
v_tmp.len = v.len - start;
|
||||
v_tmp.delta = v.delta;
|
||||
return v_tmp;
|
||||
}
|
||||
|
||||
double Vector_column_operator(Vector_t *p, size_t idx){
|
||||
return p->pBlock[idx * p->delta];
|
||||
}
|
||||
|
||||
double Vector_row_operator(Vector_t *p, size_t idx){
|
||||
return p->pBlock[idx * p->delta];
|
||||
}
|
||||
|
||||
void Vector_sync(Matrix_t *p, size_t idx, Vector_t vec, int offset){
|
||||
for(int i = 0; i < vec.len; i++){
|
||||
p->pBlock[idx+(offset+i)*N] = vec.pBlock[i];
|
||||
}
|
||||
}
|
||||
|
||||
void Vector_row_sync(Matrix_t *p, size_t idx, Vector_t vec, int offset){
|
||||
for(int i = 0; i < vec.len; i++){
|
||||
p->pBlock[offset+idx*N+i] = vec.pBlock[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------Martrix-------------------------------//
|
||||
Matrix_t Matrix_clone(Matrix_t *p)
|
||||
{
|
||||
Matrix_t m;
|
||||
|
||||
m.pBlock = (double *)malloc(sizeof(double)*L(64,64));
|
||||
memcpy(m.pBlock, p->pBlock, sizeof(double)*L(64,64));
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
void Matrix_filledwith(Matrix_t *p, const double values[])
|
||||
{
|
||||
//p->pBlock = (double *)malloc(sizeof(double)*L(64,64));
|
||||
//memset(pBlock, 0.0, sizeof(double)*L(64,64));
|
||||
memcpy(p->pBlock, values, sizeof(double) * L(64,64));
|
||||
}
|
||||
|
||||
double Matrix_operator(Matrix_t *p, int i, int j)
|
||||
{
|
||||
assert(i >= 0 && i < N);
|
||||
assert(j >= 0 && j < N);
|
||||
|
||||
return p->pBlock[i*N+j];
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
Vector_t Matrix_row(Matrix_t *p, int i)
|
||||
{
|
||||
Vector_t vec_tmp;
|
||||
vec_tmp.len = N;
|
||||
vec_tmp.delta = 1;
|
||||
vec_tmp.pBlock = p->pBlock + i*N;
|
||||
//return Vector< const double >(this->pBlock + i * N, N, 1);
|
||||
return vec_tmp;
|
||||
|
||||
}
|
||||
|
||||
Vector_t Matrix_column(Matrix_t *p, int j)
|
||||
{
|
||||
Vector_t vec_tmp;
|
||||
vec_tmp.len = M;
|
||||
vec_tmp.delta = N;
|
||||
vec_tmp.pBlock = p->pBlock + j;
|
||||
|
||||
return vec_tmp;
|
||||
//return Vector< double >(this->pBlock + j, M, N);
|
||||
|
||||
}
|
||||
|
||||
void Matrix_release(Matrix_t *p)
|
||||
{
|
||||
if (p->pBlock != NULL){
|
||||
free(p->pBlock);
|
||||
p->pBlock = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void Matrix_init(Matrix_t *p)
|
||||
{
|
||||
p->pBlock = (double *)malloc(sizeof(double)*L(64,64));
|
||||
memset(p->pBlock, 0.0, sizeof(double)*L(64,64));
|
||||
//memcpy(p->pBlock, values, sizeof(double) * L(64,64));
|
||||
}
|
||||
|
||||
void Matrix_def(Matrix_t *p)
|
||||
{
|
||||
//p->clone = Matrix_clone;
|
||||
p->filledwith = Matrix_filledwith;
|
||||
p->operator = Matrix_operator;
|
||||
p->row = Matrix_row;
|
||||
p->column = Matrix_column;
|
||||
p->release = Matrix_release;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//-----------------------------Rotator-------------------------------//
|
||||
|
||||
double max(double a, double b)
|
||||
{
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
double Rotator_operator(Rotator_t *p, int i, int j){
|
||||
assert(0 <= i && i < 2);
|
||||
assert(0 <= j && j < 2);
|
||||
return p->elements[i * 2 + j];
|
||||
}
|
||||
|
||||
void Rotator_init(Rotator_t *p, double x1, double x2)
|
||||
{
|
||||
// normalizes by the maximum magnitude
|
||||
// to avoid harmful underflow and overflow
|
||||
double mx = max(fabs(x1), fabs(x2));
|
||||
|
||||
x1 /= mx;
|
||||
x2 /= mx;
|
||||
double norm = sqrt(x1 * x1 + x2 * x2);
|
||||
double cs = x1 / norm;
|
||||
double sn = x2 / norm;
|
||||
p->elements[0] = cs;
|
||||
p->elements[1] = -sn;
|
||||
p->elements[2] = sn;
|
||||
p->elements[3] = cs;
|
||||
|
||||
p->operator = Rotator_operator;
|
||||
}
|
||||
|
||||
//-----------------------------Reflector-------------------------------//
|
||||
|
||||
|
||||
void Reflector_transform(Reflector_t *p, double u0, size_t len){
|
||||
int i;
|
||||
for (i = 0; i < len; i++){
|
||||
p->u.pBlock[i] = p->u.pBlock[i] /u0;
|
||||
}
|
||||
}
|
||||
|
||||
void Reflector_transform_left(Reflector_t *src1, Vector_t src2, Vector_t dst, double gUM, size_t len){
|
||||
int i;
|
||||
for (i = 0; i < len; i++){
|
||||
dst.pBlock[i] = src2.pBlock[i] - src1->u.pBlock[i] * gUM;
|
||||
}
|
||||
}
|
||||
|
||||
void Reflector_transform_right(Reflector_t *src1, Vector_t src2, Vector_t dst, double gMU, size_t len){
|
||||
int i;
|
||||
for (i = 0; i < len; i++){
|
||||
dst.pBlock[i] = src2.pBlock[i] - gMU * src1->u.pBlock[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Reflector_init(Reflector_t *p, Vector_t v) {
|
||||
//assert(v.size() > 0 && v.size() <= L);
|
||||
//const size_t N = v.size();
|
||||
//const size_t p->L = sizeof(v)/sizeof(double);
|
||||
p->L = v.len;
|
||||
|
||||
p->u.pBlock = (double *)malloc(sizeof(double)*v.len);
|
||||
memcpy(p->u.pBlock, v.pBlock, sizeof(double)*v.len);
|
||||
|
||||
// normalizes elements by the maximum amplitude
|
||||
// to avoid harmful underflow and overflow
|
||||
|
||||
double mx = 0.0;
|
||||
|
||||
for (size_t i = 0; i < p->L; ++i) {
|
||||
mx = max(fabs(p->u.pBlock[i]), mx);
|
||||
}
|
||||
|
||||
if (mx > 0.0) {
|
||||
// calculates the normalized norm
|
||||
double tau = 0.0;
|
||||
for (size_t i = 0; i < p->L; ++i) {
|
||||
double x = p->u.pBlock[i] / mx;
|
||||
p->u.pBlock[i] = x;
|
||||
tau += x * x;
|
||||
}
|
||||
tau = sqrt(tau);
|
||||
// tau's sign should be the same as the first element in `u`
|
||||
if (p->u.pBlock[0] < 0.0) {
|
||||
tau = -tau;
|
||||
}
|
||||
double u0 = p->u.pBlock[0] + tau;
|
||||
p->u.pBlock[0] = u0;
|
||||
Reflector_transform(p, u0, p->L);
|
||||
|
||||
p->gamma = u0 / tau;
|
||||
} else {
|
||||
// v is a zero vector
|
||||
p->gamma = 0.0;
|
||||
memset(p->u.pBlock, 0.0, p->L);
|
||||
}
|
||||
}
|
||||
|
||||
void Reflector_release(Reflector_t *p){
|
||||
if (p->u.pBlock != NULL){
|
||||
free(p->u.pBlock);
|
||||
p->u.pBlock = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
double inner_product(double *a,double *b,int n){
|
||||
int i;
|
||||
double sum = 0.0;
|
||||
|
||||
for(i = 0; i < n; i++)
|
||||
{
|
||||
sum += (*(a+i))*(*(b+i));
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
Matrix_t Reflector_applyFromLeftTo(Reflector_t *p, Matrix_t m){
|
||||
// H * m = m - gamma * u * u^T * m
|
||||
Matrix_t m2 = Matrix_clone(&m);//m->clone(m);
|
||||
Vector_t vec_m;
|
||||
Vector_t vec_m2;
|
||||
|
||||
int offset = N - p->L;
|
||||
for (int i = 0; i < N; ++i) {
|
||||
// caches gamma * u^T * m
|
||||
vec_m = Matrix_column(&m, i);
|
||||
|
||||
Vector_t srcColumn = vector_slice(vec_m, offset);
|
||||
double v_src_column[srcColumn.len];
|
||||
|
||||
for(size_t i = 0; i < srcColumn.len; i++){
|
||||
v_src_column[i] = Vector_column_operator(&srcColumn, i);
|
||||
}
|
||||
srcColumn.pBlock = v_src_column;
|
||||
|
||||
double gUM = inner_product(p->u.pBlock, srcColumn.pBlock, p->L);
|
||||
//Vector< const double > srcColumn = m->column(m, i).slice(offset);
|
||||
|
||||
gUM *= p->gamma;
|
||||
// H * m = m - u * gUM
|
||||
vec_m2 = Matrix_column(&m2, i);
|
||||
Vector_t dstColumn = vector_slice(vec_m2, offset);
|
||||
double v_dstcolumn[dstColumn.len];
|
||||
|
||||
for(size_t i = 0; i < dstColumn.len; i++){
|
||||
v_dstcolumn[i] = Vector_column_operator(&dstColumn, i);
|
||||
}
|
||||
dstColumn.pBlock = v_dstcolumn;
|
||||
|
||||
Reflector_transform_left(p, srcColumn, dstColumn, gUM, p->L);
|
||||
Vector_sync(&m2, i, dstColumn, offset);
|
||||
}
|
||||
Matrix_release(&m);
|
||||
return m2;
|
||||
}
|
||||
|
||||
Matrix_t Reflector_applyFromRightTo(Reflector_t *p, Matrix_t m){
|
||||
// m * H = m - m * gamma * u * u^T
|
||||
Matrix_t m2 = Matrix_clone(&m);
|
||||
Vector_t vec_m;
|
||||
Vector_t vec_m2;
|
||||
|
||||
int offset = 64 - p->L;
|
||||
|
||||
for (int i = 0; i < M; ++i) {
|
||||
// caches gamma * m * u
|
||||
vec_m = Matrix_row(&m, i);
|
||||
Vector_t srcRow = vector_slice(vec_m, offset);
|
||||
|
||||
double v_src_row[srcRow.len];
|
||||
for(size_t j = 0; j< srcRow.len; j++){
|
||||
v_src_row[j] = Vector_row_operator(&srcRow, j);
|
||||
}
|
||||
srcRow.pBlock = v_src_row;
|
||||
|
||||
double gMU = inner_product(p->u.pBlock, srcRow.pBlock, p->L);
|
||||
|
||||
gMU *= p->gamma;
|
||||
// m * H = m - gMU * u^T
|
||||
vec_m2 = Matrix_row(&m2, i);
|
||||
|
||||
Vector_t dstRow = vector_slice(vec_m2, offset);
|
||||
|
||||
double v_dstrow[dstRow.len];
|
||||
|
||||
for(size_t j = 0; j < dstRow.len; j++){
|
||||
v_dstrow[j] = Vector_row_operator(&dstRow, j);
|
||||
}
|
||||
dstRow.pBlock = v_dstrow;
|
||||
|
||||
Reflector_transform_right(p ,srcRow, dstRow, gMU, p->L);
|
||||
Vector_row_sync(&m2, i, dstRow, offset);
|
||||
}
|
||||
Matrix_release(&m);
|
||||
return m2;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------Svd-------------------------------//
|
||||
|
||||
int cmp_double(const void* e1, const void* e2)
|
||||
{
|
||||
if ((*(double*)e2 - *(double*)e1) > 0.00000)
|
||||
return 1;
|
||||
else if ((*(double*)e2 - *(double*)e1) == 0.000000)
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
DiagonalMatrix_t Svd_decomposeUSV(BidiagonalMatrix_t *p, Matrix_t *m) {
|
||||
const int MAX_ITERATIONS = N * 10;
|
||||
// allocates matrices
|
||||
Matrix_t m1 = Matrix_clone(m);
|
||||
Matrix_def(&m1);
|
||||
|
||||
|
||||
|
||||
// bidiagonalizes a given matrix
|
||||
BidiagonalMatrix_t m2 = p->bidiagonalize(p, m1);
|
||||
// repeats Francis iteration
|
||||
|
||||
|
||||
int iteration = 0;
|
||||
int n = N;
|
||||
|
||||
while (n >= 2) {
|
||||
// processes the n-1 x n-1 submatrix
|
||||
// if the current n x n submatrix has converged
|
||||
double bn = m2.operator(&m2, n - 1, n - 1);
|
||||
|
||||
if (bn == 0.0 || fabs(m2.operator(&m2, n - 2, n - 1) / bn) < 1.0e-15) {
|
||||
--n;
|
||||
} else {
|
||||
// aborts if too many iterations
|
||||
++iteration;
|
||||
if (iteration > MAX_ITERATIONS) {
|
||||
break;
|
||||
}
|
||||
m2.doFrancis(&m2, n);
|
||||
}
|
||||
}
|
||||
|
||||
// copies the diagonal elements
|
||||
// and makes all singular values positive
|
||||
double ss[N];
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (m2.operator(&m2, i, i) < 0) {
|
||||
ss[i] = -m2.operator(&m2, i, i);
|
||||
// inverts the sign of the right singular vector
|
||||
//Vector< double > vi = v.column(i);
|
||||
//std::transform(
|
||||
// vi.begin(), vi.end(), vi.begin(),
|
||||
// [](double x) {
|
||||
// return -x;
|
||||
// });
|
||||
} else {
|
||||
ss[i] = m2.operator(&m2, i, i);
|
||||
}
|
||||
}
|
||||
|
||||
// sorts singular values in descending order if necessary
|
||||
int shuffle[M]; // M >= N
|
||||
bool sortNeeded = false;
|
||||
for (int i = 0; i < M; ++i) {
|
||||
shuffle[i] = i;
|
||||
sortNeeded = sortNeeded || (i < N - 1 && ss[i] < ss[i + 1]);
|
||||
}
|
||||
|
||||
m1.release(&m1);
|
||||
BidiagonalMatrix_release(p);
|
||||
|
||||
|
||||
DiagonalMatrix_t dm;
|
||||
if (sortNeeded) {
|
||||
// shuffles the N (<= M) singular values
|
||||
qsort(ss, N,sizeof(double), cmp_double);
|
||||
|
||||
double ss2[M];
|
||||
|
||||
memcpy(ss2, ss, M*sizeof(double));
|
||||
DiagonalMatrix_init(&dm, ss2);
|
||||
|
||||
return dm;
|
||||
} else {
|
||||
DiagonalMatrix_init(&dm, ss);
|
||||
return dm;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool Svd_isFullRank(DiagonalMatrix_t *p, const int size) {
|
||||
const double round_off = 1.000009e-12;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
if (fabs( p->operator(p, i, i) ) < round_off){
|
||||
p->release(p);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
p->release(p);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------BidiagonalMatrix_t-------------------------------//
|
||||
BidiagonalMatrix_t BidiagonalMatrix_bidiagonalize(BidiagonalMatrix_t *p, Matrix_t m)
|
||||
{
|
||||
assert(M >= N);
|
||||
|
||||
Vector_t vec_m;
|
||||
Vector_t vec_m2;
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
Reflector_t rU;
|
||||
|
||||
vec_m = Matrix_column(&m, i);
|
||||
Vector_t column_slice = vector_slice(vec_m, i);
|
||||
// applies a householder transform to the column vector i
|
||||
|
||||
double v_column[column_slice.len];
|
||||
|
||||
for(size_t i = 0; i < column_slice.len; i++){
|
||||
v_column[i] = Vector_column_operator(&column_slice, i);
|
||||
}
|
||||
column_slice.pBlock = v_column;
|
||||
|
||||
Reflector_init(&rU, column_slice);
|
||||
|
||||
m = Reflector_applyFromLeftTo(&rU, m);
|
||||
|
||||
Reflector_release(&rU);
|
||||
//u = rU.applyFromRightTo(u); // U1^T*U0^T = U0*U1
|
||||
if (i < N - 1) {
|
||||
// applies a householder transform to the row vector i + 1
|
||||
//Reflector< N > rV(m.row(i).slice(i + 1));
|
||||
Reflector_t rV;
|
||||
vec_m2 = Matrix_row(&m, i);
|
||||
Vector_t row_slice = vector_slice(vec_m2, i+1);
|
||||
|
||||
double v_row[row_slice.len];
|
||||
|
||||
for(size_t i = 0; i < row_slice.len; i++){
|
||||
v_row[i] = Vector_row_operator(&row_slice, i);
|
||||
}
|
||||
row_slice.pBlock = v_row;
|
||||
Reflector_init(&rV, row_slice);
|
||||
|
||||
m = Reflector_applyFromRightTo(&rV, m);
|
||||
//m = rV.applyFromRightTo(m);
|
||||
//v = rV.applyFromRightTo(v);
|
||||
|
||||
Reflector_release(&rV);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
BidiagonalMatrix_init(p, &m);
|
||||
return *p;
|
||||
}
|
||||
|
||||
void BidiagonalMatrix_release(BidiagonalMatrix_t *p)
|
||||
{
|
||||
if (p->pBlock != NULL){
|
||||
free(p->pBlock);
|
||||
p->pBlock = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
double BidiagonalMatrix_operator(BidiagonalMatrix_t *p, int i, int j)
|
||||
{
|
||||
assert(i >= 0 && i < M);
|
||||
assert(j >= 0 && j < N);
|
||||
if (i == j) {
|
||||
return p->pBlock[2 * i];
|
||||
} else if (i + 1 == j) {
|
||||
return p->pBlock[2 * i + 1];
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
double BidiagonalMatrix_applyFirstRotatorFromRight(BidiagonalMatrix_t *p, Rotator_t *r)
|
||||
{
|
||||
double b1 = p->pBlock[0];
|
||||
double g1 = p->pBlock[1];
|
||||
double b2 = p->pBlock[2];
|
||||
double r11 = Rotator_operator(r, 0, 0);//r->operator(r, 0, 0);
|
||||
double r12 = Rotator_operator(r, 0, 1);//r->operator(r, 0, 1);
|
||||
double r21 = Rotator_operator(r, 1, 0);//r->operator(r, 1, 0);
|
||||
double r22 = Rotator_operator(r, 1, 1);//r->operator(r, 1, 1);
|
||||
//Rotator_operator
|
||||
|
||||
p->pBlock[0] = b1 * r11 + g1 * r21;
|
||||
p->pBlock[1] = b1 * r12 + g1 * r22;
|
||||
p->pBlock[2] = b2 * r22;
|
||||
return b2 * r21;
|
||||
}
|
||||
|
||||
double BidiagonalMatrix_applyRotatorFromRight(BidiagonalMatrix_t *ptr, Rotator_t *r, int n, double bulge)
|
||||
{
|
||||
double* p = ptr->pBlock + n * 2;
|
||||
double g0 = p[-1];
|
||||
double b1 = p[0];
|
||||
double g1 = p[1];
|
||||
double b2 = p[2];
|
||||
double r11 = r->operator(r, 0, 0);
|
||||
double r12 = r->operator(r, 0, 1);
|
||||
double r21 = r->operator(r, 1, 0);
|
||||
double r22 = r->operator(r, 1, 1);
|
||||
p[-1] = g0 * r11 + bulge * r21;
|
||||
p[0] = b1 * r11 + g1 * r21;
|
||||
p[1] = b1 * r12 + g1 * r22;
|
||||
p[2] = b2 * r22;
|
||||
return b2 * r21;
|
||||
}
|
||||
|
||||
double BidiagonalMatrix_applyRotatorFromLeft(BidiagonalMatrix_t *ptr, Rotator_t *r, int n, double bulge)
|
||||
{
|
||||
double* p = ptr->pBlock + n * 2;
|
||||
double b1 = p[0];
|
||||
double g1 = p[1];
|
||||
double b2 = p[2];
|
||||
double r11 = r->operator(r, 0, 0);
|
||||
double r12 = r->operator(r, 0, 1);
|
||||
double r21 = r->operator(r, 1, 0);
|
||||
double r22 = r->operator(r, 1, 1);
|
||||
|
||||
p[0] = r11 * b1 + r21 * bulge;
|
||||
p[1] = r11 * g1 + r21 * b2;
|
||||
p[2] = r12 * g1 + r22 * b2;
|
||||
double newBulge;
|
||||
if (n < N - 2) {
|
||||
double g2 = p[3];
|
||||
newBulge = r21 * g2;
|
||||
p[3] = r22 * g2;
|
||||
} else {
|
||||
newBulge = 0.0;
|
||||
}
|
||||
return newBulge;
|
||||
}
|
||||
|
||||
double BidiagonalMatrix_calculateShift(BidiagonalMatrix_t *m, int n)
|
||||
{
|
||||
assert(M >= N);
|
||||
assert(n >= 2);
|
||||
double b1 = m->operator(m, n - 2, n - 2);
|
||||
double b2 = m->operator(m, n - 1, n - 1);
|
||||
double g1 = m->operator(m, n - 2, n - 1);
|
||||
|
||||
// solves lambda^4 - d*lambda^2 + e = 0
|
||||
// where
|
||||
// d = b1^2 + b2^2 + g1^2
|
||||
// e = b1^2 * b2^2
|
||||
// chooses lambda (rho) closest to b2
|
||||
double rho;
|
||||
double d = b1 * b1 + b2 * b2 + g1 * g1;
|
||||
double e = b1 * b1 * b2 * b2;
|
||||
// lambda^2 = (d +- sqrt(d^2 - 4e)) / 2
|
||||
// so, f = d^2 - 4e must be positive
|
||||
double f = d * d - 4 * e;
|
||||
|
||||
if (f >= 0) {
|
||||
f = sqrt(f);
|
||||
// lambda = +-sqrt(d +- f) (d >= 0, f >= 0)
|
||||
// if d > f, both d+f and d-f have real square roots
|
||||
// otherwise considers only d+f
|
||||
if (d > f) {
|
||||
// lets l1 > l2
|
||||
double l1 = sqrt((d + f) * 0.5);
|
||||
double l2 = sqrt((d - f) * 0.5);
|
||||
// if b2 >= 0, chooses a positive shift
|
||||
// otherwise chooses a negative shift
|
||||
if (b2 >= 0) {
|
||||
if (fabs(b2 - l1) < fabs(b2 - l2)) {
|
||||
rho = l1;
|
||||
} else {
|
||||
rho = l2;
|
||||
}
|
||||
} else {
|
||||
if (fabs(b2 + l1) < fabs(b2 + l2)) {
|
||||
rho = -l1;
|
||||
} else {
|
||||
rho = -l2;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
double l1 = sqrt((d + f) * 0.5);
|
||||
if (fabs(b2 - l1) <= fabs(b2 + l1)) {
|
||||
rho = l1;
|
||||
} else {
|
||||
rho = -l1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// no solution. chooses b2 as the shift
|
||||
rho = b2;
|
||||
}
|
||||
|
||||
return rho;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void BidiagonalMatrix_doFrancis(BidiagonalMatrix_t *m, int n)
|
||||
{
|
||||
assert(M >= N);
|
||||
assert(n >= 2);
|
||||
// calculates the shift
|
||||
double rho = m->calculateShift(m, n);
|
||||
|
||||
// applies the first right rotator
|
||||
double b1 = m->operator(m, 0, 0);
|
||||
double g1 = m->operator(m, 0, 1);
|
||||
double mx = max(fabs(rho), max(fabs(b1), fabs(g1)));
|
||||
rho /= mx;
|
||||
b1 /= mx;
|
||||
g1 /= mx;
|
||||
//Rotator_t r0(b1 * b1 - rho * rho, b1 * g1);
|
||||
|
||||
Rotator_t r0;
|
||||
Rotator_init(&r0, b1 * b1 - rho * rho, b1 * g1);
|
||||
|
||||
double bulge = m->applyFirstRotatorFromRight(m, &r0);
|
||||
//v = r0.applyFromRightTo(&r0, v, 0);
|
||||
// applies the first left rotator
|
||||
|
||||
Rotator_t r1;
|
||||
Rotator_init(&r1, m->operator(m, 0, 0), bulge);
|
||||
//Rotator_t r1(m(0, 0), bulge);
|
||||
bulge = m->applyRotatorFromLeft(m, &r1, 0, bulge);
|
||||
//u = r1.applyFromRightTo(&r1, u, 0); // U1^T*U0^T = U0*U1
|
||||
|
||||
for (int i = 1; i + 1 < n; ++i) {
|
||||
// calculates (i+1)-th right rotator
|
||||
//Rotator rV(m(i - 1, i), bulge);
|
||||
Rotator_t rV;
|
||||
Rotator_init(&rV, m->operator(m, i - 1, i), bulge);
|
||||
|
||||
bulge = m->applyRotatorFromRight(m, &rV, i, bulge);
|
||||
//v = rV.applyFromRightTo(&rV, v, i);
|
||||
// calculates (i+1)-th left rotator
|
||||
//Rotator rU(m(i, i), bulge);
|
||||
Rotator_t rU;
|
||||
Rotator_init(&rU, m->operator(m, i, i), bulge);
|
||||
|
||||
bulge = m->applyRotatorFromLeft(m, &rU, i, bulge);
|
||||
//u = rU.applyFromRightTo(rU, u, i); // U1^T*U0^T = U0*U1
|
||||
}
|
||||
}
|
||||
|
||||
void BidiagonalMatrix_def(BidiagonalMatrix_t *p)
|
||||
{
|
||||
p->applyFirstRotatorFromRight = BidiagonalMatrix_applyFirstRotatorFromRight;
|
||||
p->applyRotatorFromLeft = BidiagonalMatrix_applyRotatorFromLeft;
|
||||
p->applyRotatorFromRight = BidiagonalMatrix_applyRotatorFromRight;
|
||||
p->bidiagonalize = BidiagonalMatrix_bidiagonalize;
|
||||
p->calculateShift = BidiagonalMatrix_calculateShift;
|
||||
p->doFrancis = BidiagonalMatrix_doFrancis;
|
||||
p->operator = BidiagonalMatrix_operator;
|
||||
p->releases = BidiagonalMatrix_release;
|
||||
|
||||
}
|
||||
|
||||
void BidiagonalMatrix_init(BidiagonalMatrix_t *p, Matrix_t *m)
|
||||
{
|
||||
assert(M >= N);
|
||||
int len;
|
||||
len = 2 * N - 1;
|
||||
|
||||
p->pBlock = (double *)malloc(sizeof(double)*len);
|
||||
memset(p->pBlock, 0.0,sizeof(double)*len);
|
||||
|
||||
for (int i = 0; i < N; ++i) {
|
||||
p->pBlock[i * 2] = Matrix_operator(m, i, i);//m->operator(m, i, i);
|
||||
if (i < N - 1) {
|
||||
p->pBlock[i * 2 + 1] = Matrix_operator(m, i, i + 1);//m->operator(m, i, i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool IsFullRank(const uint64_t matrix_[64*64])
|
||||
{
|
||||
double matrix__ [64*64];
|
||||
// Matrix<64, 64> matrix;
|
||||
|
||||
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
for (int j = 0; j < 64; ++j) {
|
||||
matrix__[64*i + j] = (double) matrix_[64*i + j];
|
||||
}
|
||||
}
|
||||
|
||||
DiagonalMatrix_t dm;
|
||||
Matrix_t mt;
|
||||
BidiagonalMatrix_t bt;
|
||||
|
||||
DiagonalMatrix_init(&dm, matrix__);
|
||||
//matrix.fill(matrix__);
|
||||
|
||||
Matrix_init(&mt);
|
||||
Matrix_def(&mt);
|
||||
mt.filledwith(&mt, matrix__);
|
||||
|
||||
BidiagonalMatrix_def(&bt);
|
||||
DiagonalMatrix_t usv = Svd_decomposeUSV(&bt, &mt);
|
||||
DiagonalMatrix_t singularValues = usv;
|
||||
mt.release(&mt);
|
||||
dm.release(&dm);
|
||||
//DiagonalMatrix_release(&dm);
|
||||
return Svd_isFullRank(&usv,64);
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
uint64_t GetUint64_t(uint8_t *data, int pos)
|
||||
{
|
||||
const uint8_t* ptr = data + pos * 8;
|
||||
return ((uint64_t)ptr[0]) | \
|
||||
((uint64_t)ptr[1]) << 8 | \
|
||||
((uint64_t)ptr[2]) << 16 | \
|
||||
((uint64_t)ptr[3]) << 24 | \
|
||||
((uint64_t)ptr[4]) << 32 | \
|
||||
((uint64_t)ptr[5]) << 40 | \
|
||||
((uint64_t)ptr[6]) << 48 | \
|
||||
((uint64_t)ptr[7]) << 56;
|
||||
}
|
||||
|
||||
void XoShiRo256PlusPlus_init(Obtc_t *Obtc, uint64_t *s, uint256 seed) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
//p->s[i] = seed.GetUint64(i);
|
||||
s[i] = GetUint64_t(Obtc->data_r,i);
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t RotateLeft64(const uint64_t x, int k) {
|
||||
return (x << k) | (x >> (64 - k));
|
||||
}
|
||||
|
||||
|
||||
uint64_t XoShiRo256PlusPlus_operator(uint64_t *s){
|
||||
const uint64_t result = RotateLeft64(s[0] + s[3], 23) + s[0];
|
||||
|
||||
const uint64_t t = s[1] << 17;
|
||||
|
||||
s[2] ^= s[0];
|
||||
s[3] ^= s[1];
|
||||
s[1] ^= s[2];
|
||||
s[0] ^= s[3];
|
||||
|
||||
s[2] ^= t;
|
||||
|
||||
s[3] = RotateLeft64(s[3], 45);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void GenerateHeavyHashMatrix_t(Obtc_t *Obtc, uint256 matrix_seed, uint64_t matrix[64*64])
|
||||
{
|
||||
XoShiRo256PlusPlus_init(Obtc, Obtc->ss, matrix_seed);
|
||||
|
||||
do {
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
for (int j = 0; j < 64; j += 16) {
|
||||
uint64_t value = XoShiRo256PlusPlus_operator(Obtc->ss);//generator();
|
||||
for (int shift = 0; shift < 16; ++shift) {
|
||||
matrix[64*i + j + shift] = (value >> (4 * shift)) & 0xF;
|
||||
}
|
||||
}
|
||||
}
|
||||
//} while (!Is4BitPrecision(matrix) || !IsFullRank(matrix));
|
||||
}while(!Is4BitPrecision(matrix));
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void serialize_heavyhash(Obtc_t *Obtc, uint64_t matrix[64*64], const char* in, char* out, int len)
|
||||
{
|
||||
uint8_t temp[200]={
|
||||
0x02,0xb9,0x7c,0x78,0x6f,0x82,0x43,0x83,0x5d,0x11,0x29,0xcf,0x82,0xaf,0xa5,0xbc,0xb1,0xfc,0xce,0x9c,
|
||||
0xe7,0x8b,0x52,0x72,0x48,0xb0,0x94,0x27,0xa8,0x74,0x2e,0xdb,0x89,0xca,0x4e,0x84,0x9b,0xce,0xcf,0x4a,
|
||||
0xd1,0x02,0x57,0x41,0x05,0x09,0x5f,0x8d,0xba,0x1d,0xe5,0xe4,0x45,0x16,0x68,0xe4,0xc1,0xa2,0x02,0x1d,
|
||||
0x56,0x3b,0xb1,0x42,0x8f,0x06,0xdd,0x1c,0x7a,0x2f,0x85,0x1a,0x34,0x85,0x54,0x90,0x64,0xa3,0x6a,0x46,
|
||||
0xb2,0x1a,0x60,0x1f,0x85,0xb4,0xb2,0x23,0xe6,0xc8,0x5d,0x8f,0x82,0xe9,0xda,0x89,0xec,0x70,0xf1,0xa4,
|
||||
0x25,0xb1,0x37,0x15,0x44,0xe3,0x67,0x87,0x5b,0x29,0x91,0x52,0x0f,0x96,0x07,0x05,0x40,0xf1,0x4a,0x0e,
|
||||
0x2e,0x65,0x1c,0x3c,0x43,0x28,0x5f,0xf0,0xf8,0xeb,0xf1,0x33,0x88,0x66,0x31,0x40,0x77,0x6b,0xf6,0x0c,
|
||||
0x78,0x9b,0xc2,0x9c,0x18,0x3a,0x98,0x1e,0xad,0x41,0x5b,0x10,0x4a,0xef,0x61,0xd6,0x29,0xdc,0xe2,0x46,
|
||||
0x7b,0x2f,0xaf,0xca,0x87,0x5e,0x2d,0x65,0x1b,0xa5,0xa4,0xa3,0xf5,0x98,0x69,0xa0,0x1e,0x5f,0x2e,0x72,
|
||||
0x0e,0xfb,0x44,0xd2,0x29,0xbf,0x88,0x55,0xb7,0x02,0x7e,0x3c,0x11,0x3c,0xff,0x0d,0xa1,0xf6,0xd8,0x3d
|
||||
};
|
||||
for(int i = 0 ;i< 200 ;i++)Obtc->const_data[i] = temp[i];
|
||||
|
||||
CHeavyHash_init(Obtc, &Obtc->CHeavyHash_p, matrix);
|
||||
CHeavyHash_Write(&Obtc->CHeavyHash_p, (const unsigned char*)in, len);
|
||||
CHeavyHash_Finalize(Obtc, &Obtc->CHeavyHash_p, (unsigned char*)out);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void opticalbtc_hash(const char* in, char* out, int len)
|
||||
{
|
||||
uint8_t *ptr = (uint8_t*) in;
|
||||
uint256 seed, hashprev;
|
||||
uint64_t matrix[64*64];
|
||||
|
||||
Obtc_t Obtc;
|
||||
|
||||
CSHA3_256_init(&Obtc, &Obtc.CSHA3_256_p);
|
||||
memcpy(Obtc.data_r,ptr, 32);
|
||||
GenerateHeavyHashMatrix_t(&Obtc, seed, matrix);
|
||||
serialize_heavyhash(&Obtc, matrix, in, out, len);
|
||||
|
||||
}
|
||||
|
||||
52
heavyHash/obtc.h
Normal file
52
heavyHash/obtc.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#ifndef OBTC_H
|
||||
#define OBTC_H
|
||||
|
||||
|
||||
#include "uint256.h"
|
||||
#include "xoshiro256pp.h"
|
||||
#include "Svd.h"
|
||||
#include "DiagonalMatrix.h"
|
||||
#include "Matrix.h"
|
||||
#include "Rotator.h"
|
||||
#include "heavyhash.h"
|
||||
|
||||
|
||||
typedef struct Obtc_opt Obtc_t;
|
||||
struct Obtc_opt{
|
||||
uint8_t data_r[32];
|
||||
uint64_t ss[4];
|
||||
uint8_t const_data[200];
|
||||
CSHA3_256 CSHA3_256_p;
|
||||
CHeavyHash CHeavyHash_p;
|
||||
uint256 g_hash_first;
|
||||
XoShiRo256PlusPlus_t *xo;
|
||||
DiagonalMatrix_t g_DiagonalMatrix;
|
||||
|
||||
};
|
||||
|
||||
//struct Obtc_opt;
|
||||
|
||||
|
||||
bool Is4BitPrecision(const uint64_t matrix[64*64]);
|
||||
bool IsFullRank(const uint64_t matrix_[64*64]);
|
||||
void GenerateHeavyHashMatrix(uint256 matrix_seed, uint64_t matrix[64*64]);
|
||||
void serialize_heavyhash(Obtc_t *Obtc, uint64_t matrix[64*64], const char* in, char* out, int len);
|
||||
void opticalbtc_hash(const char* in, char* out, int len);
|
||||
|
||||
extern void CSHA3_256_init(Obtc_t *Obtc, CSHA3_256 *p);
|
||||
extern void CSHA3_256_CSHA3_256(Obtc_t *Obtc, CSHA3_256 *p);
|
||||
|
||||
extern void CSHA3_256_Write(CSHA3_256 *p, const unsigned char* data, size_t len);
|
||||
|
||||
extern void CSHA3_256_Finalize(CSHA3_256 *p, unsigned char hash[OUTPUT_SIZE]);
|
||||
//extern void CSHA3_256_Reset(Obtc_t *Obtc, CSHA3_256 *p);
|
||||
|
||||
extern void CHeavyHash_init(Obtc_t *Obtc, CHeavyHash *p, uint64_t matrix_[64*64]);
|
||||
extern void CHeavyHash_Write(CHeavyHash *p, const unsigned char* data, size_t len);
|
||||
|
||||
extern void CHeavyHash_Finalize(Obtc_t *Obtc, CHeavyHash *p, unsigned char hash[OUTPUT_SIZE]);
|
||||
|
||||
extern int sha3_init(Obtc_t *Obtc,sha3_ctx_t *c, int mdlen); // mdlen = hash output in bytes
|
||||
|
||||
|
||||
#endif // OBTC_H
|
||||
BIN
heavyHash/obtc.o
Normal file
BIN
heavyHash/obtc.o
Normal file
Binary file not shown.
199
heavyHash/sha3.c
Normal file
199
heavyHash/sha3.c
Normal file
@@ -0,0 +1,199 @@
|
||||
// sha3.c
|
||||
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
|
||||
// Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3"
|
||||
// Revised 03-Sep-15 for portability + OpenSSL - style API
|
||||
#include <stdio.h>
|
||||
#include "sha3.h"
|
||||
#include "obtc.h"
|
||||
|
||||
|
||||
|
||||
|
||||
// update the state with given number of rounds
|
||||
|
||||
void sha3_keccakf(uint64_t st[25])
|
||||
{
|
||||
// constants
|
||||
const uint64_t keccakf_rndc[24] = {
|
||||
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
|
||||
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
|
||||
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
|
||||
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
|
||||
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
|
||||
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
|
||||
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
||||
};
|
||||
const int keccakf_rotc[24] = {
|
||||
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
|
||||
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
|
||||
};
|
||||
const int keccakf_piln[24] = {
|
||||
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
|
||||
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
|
||||
};
|
||||
|
||||
// variables
|
||||
int i, j, r;
|
||||
uint64_t t, bc[5];
|
||||
|
||||
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
|
||||
uint8_t *v;
|
||||
|
||||
// endianess conversion. this is redundant on little-endian targets
|
||||
for (i = 0; i < 25; i++) {
|
||||
v = (uint8_t *) &st[i];
|
||||
st[i] = ((uint64_t) v[0]) | (((uint64_t) v[1]) << 8) |
|
||||
(((uint64_t) v[2]) << 16) | (((uint64_t) v[3]) << 24) |
|
||||
(((uint64_t) v[4]) << 32) | (((uint64_t) v[5]) << 40) |
|
||||
(((uint64_t) v[6]) << 48) | (((uint64_t) v[7]) << 56);
|
||||
}
|
||||
#endif
|
||||
|
||||
// actual iteration
|
||||
for (r = 0; r < KECCAKF_ROUNDS; r++) {
|
||||
|
||||
// Theta
|
||||
for (i = 0; i < 5; i++)
|
||||
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
|
||||
for (j = 0; j < 25; j += 5)
|
||||
st[j + i] ^= t;
|
||||
}
|
||||
|
||||
// Rho Pi
|
||||
t = st[1];
|
||||
for (i = 0; i < 24; i++) {
|
||||
j = keccakf_piln[i];
|
||||
bc[0] = st[j];
|
||||
st[j] = ROTL64(t, keccakf_rotc[i]);
|
||||
t = bc[0];
|
||||
}
|
||||
|
||||
// Chi
|
||||
for (j = 0; j < 25; j += 5) {
|
||||
for (i = 0; i < 5; i++)
|
||||
bc[i] = st[j + i];
|
||||
for (i = 0; i < 5; i++)
|
||||
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
|
||||
}
|
||||
|
||||
// Iota
|
||||
st[0] ^= keccakf_rndc[r];
|
||||
}
|
||||
|
||||
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
|
||||
// endianess conversion. this is redundant on little-endian targets
|
||||
for (i = 0; i < 25; i++) {
|
||||
v = (uint8_t *) &st[i];
|
||||
t = st[i];
|
||||
v[0] = t & 0xFF;
|
||||
v[1] = (t >> 8) & 0xFF;
|
||||
v[2] = (t >> 16) & 0xFF;
|
||||
v[3] = (t >> 24) & 0xFF;
|
||||
v[4] = (t >> 32) & 0xFF;
|
||||
v[5] = (t >> 40) & 0xFF;
|
||||
v[6] = (t >> 48) & 0xFF;
|
||||
v[7] = (t >> 56) & 0xFF;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Initialize the context for SHA3
|
||||
|
||||
int sha3_init(Obtc_t *Obtc, sha3_ctx_t *c, int mdlen)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 200; i++){
|
||||
c->st.b[i] = Obtc->const_data[199-i];
|
||||
}
|
||||
|
||||
c->mdlen = mdlen;
|
||||
c->rsiz = 200 - 2 * mdlen;
|
||||
c->pt = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// update state with more data
|
||||
|
||||
int sha3_update(sha3_ctx_t *c, const void *data, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
int j;
|
||||
|
||||
j = c->pt;
|
||||
|
||||
for (i = 0; i < len; i++) {
|
||||
c->st.b[j++] ^= ((const uint8_t *) data)[i];
|
||||
if (j >= c->rsiz) {
|
||||
sha3_keccakf(c->st.q);
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
c->pt = j;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// finalize and output a hash
|
||||
|
||||
int sha3_final(void *md, sha3_ctx_t *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
// c->st.b[c->pt] ^= 0x06;
|
||||
c->st.b[c->pt] ^= 0x04;
|
||||
c->st.b[c->rsiz - 1] ^= 0x80;
|
||||
sha3_keccakf(c->st.q);
|
||||
|
||||
for (i = 0; i < c->mdlen; i++) {
|
||||
((uint8_t *) md)[i] = c->st.b[i];
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// compute a SHA-3 hash (md) of given byte length from "in"
|
||||
|
||||
/*void *sha3(const void *in, size_t inlen, void *md, int mdlen)
|
||||
{
|
||||
sha3_ctx_t sha3;
|
||||
|
||||
sha3_init(&sha3, mdlen);
|
||||
sha3_update(&sha3, in, inlen);
|
||||
sha3_final(md, &sha3);
|
||||
|
||||
return md;
|
||||
}*/
|
||||
|
||||
// SHAKE128 and SHAKE256 extensible-output functionality
|
||||
|
||||
void shake_xof(sha3_ctx_t *c)
|
||||
{
|
||||
c->st.b[c->pt] ^= 0x1F;
|
||||
c->st.b[c->rsiz - 1] ^= 0x80;
|
||||
sha3_keccakf(c->st.q);
|
||||
c->pt = 0;
|
||||
}
|
||||
|
||||
void shake_out(sha3_ctx_t *c, void *out, size_t len)
|
||||
{
|
||||
size_t i;
|
||||
int j;
|
||||
|
||||
j = c->pt;
|
||||
for (i = 0; i < len; i++) {
|
||||
if (j >= c->rsiz) {
|
||||
sha3_keccakf(c->st.q);
|
||||
j = 0;
|
||||
}
|
||||
((uint8_t *) out)[i] = c->st.b[j++];
|
||||
}
|
||||
c->pt = j;
|
||||
}
|
||||
|
||||
51
heavyHash/sha3.h
Normal file
51
heavyHash/sha3.h
Normal file
@@ -0,0 +1,51 @@
|
||||
// sha3.h
|
||||
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
|
||||
|
||||
#ifndef SHA3_H
|
||||
#define SHA3_H
|
||||
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
|
||||
|
||||
#ifndef KECCAKF_ROUNDS
|
||||
#define KECCAKF_ROUNDS 24
|
||||
#endif
|
||||
|
||||
#ifndef ROTL64
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
#endif
|
||||
|
||||
// state context
|
||||
typedef struct {
|
||||
union { // state:
|
||||
uint8_t b[200]; // 8-bit bytes
|
||||
uint64_t q[25]; // 64-bit words
|
||||
} st;
|
||||
int pt, rsiz, mdlen; // these don't overflow
|
||||
} sha3_ctx_t;
|
||||
|
||||
// Compression function.
|
||||
void sha3_keccakf(uint64_t st[25]);
|
||||
|
||||
// OpenSSL - like interfece
|
||||
|
||||
int sha3_update(sha3_ctx_t *c, const void *data, size_t len);
|
||||
int sha3_final(void *md, sha3_ctx_t *c); // digest goes to md
|
||||
|
||||
// compute a sha3 hash (md) of given byte length from "in"
|
||||
void *sha3(const void *in, size_t inlen, void *md, int mdlen);
|
||||
|
||||
// SHAKE128 and SHAKE256 extensible-output functions
|
||||
//#define shake128_init(c) sha3_init(c, 16)
|
||||
//#define shake256_init(c) sha3_init(c, 32)
|
||||
//#define shake_update sha3_update
|
||||
|
||||
void shake_xof(sha3_ctx_t *c);
|
||||
void shake_out(sha3_ctx_t *c, void *out, size_t len);
|
||||
|
||||
#endif
|
||||
|
||||
BIN
heavyHash/sha3.o
Normal file
BIN
heavyHash/sha3.o
Normal file
Binary file not shown.
42
heavyHash/singular.h
Normal file
42
heavyHash/singular.h
Normal file
@@ -0,0 +1,42 @@
|
||||
#ifndef _SINGULAR_SINGULAR_H
|
||||
#define _SINGULAR_SINGULAR_H
|
||||
|
||||
/** The version of the singular library. */
|
||||
#define SINGULAR_VERSION "@PROJECT_VERSION@"
|
||||
|
||||
/**
|
||||
* Whether rvalue references are supported.
|
||||
*
|
||||
* Visual Studio 2010 and lower do not have rvalue references so far.
|
||||
*/
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1700
|
||||
#define SINGULAR_RVALUE_REFERENCE_SUPPORTED 0
|
||||
#else
|
||||
#define SINGULAR_RVALUE_REFERENCE_SUPPORTED 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Whether function deletions are supported.
|
||||
*
|
||||
* Visual Studio 2012 and lower do not like "delete" stuff so far.
|
||||
*/
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1800
|
||||
#define SINGULAR_FUNCTION_DELETION_SUPPORTED 0
|
||||
#else
|
||||
#define SINGULAR_FUNCTION_DELETION_SUPPORTED 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Whether template friend operator overalodings are supported.
|
||||
*
|
||||
* Visual Studio 2012 and lower do not like overloading a template firend
|
||||
* operators.
|
||||
* Neither does GCC.
|
||||
*/
|
||||
#if (defined(_MSC_VER) && _MSC_VER < 1800) || (defined(__GNUC__) && !defined(__clang__))
|
||||
#define SINGULAR_TEMPLATE_FRIEND_OPERATOR_OVERLOADING_SUPPORTED 0
|
||||
#else
|
||||
#define SINGULAR_TEMPLATE_FRIEND_OPERATOR_OVERLOADING_SUPPORTED 1
|
||||
#endif
|
||||
|
||||
#endif
|
||||
183
heavyHash/test.c
Normal file
183
heavyHash/test.c
Normal file
@@ -0,0 +1,183 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "obtc.h"
|
||||
#include "singular.h"
|
||||
#include<time.h>
|
||||
|
||||
|
||||
//uint8_t const_data[200];
|
||||
|
||||
static const int hex2bin_tbl[256] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
|
||||
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
|
||||
{
|
||||
int nibble1, nibble2;
|
||||
unsigned char idx;
|
||||
bool ret = false;
|
||||
|
||||
while (*hexstr && len) {
|
||||
if ((!hexstr[1])) {
|
||||
printf("hex2bin str truncated");
|
||||
return ret;
|
||||
}
|
||||
|
||||
idx = *hexstr++;
|
||||
nibble1 = hex2bin_tbl[idx];
|
||||
idx = *hexstr++;
|
||||
nibble2 = hex2bin_tbl[idx];
|
||||
|
||||
if (((nibble1 < 0) || (nibble2 < 0))) {
|
||||
printf("hex2bin scan failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
*p++ = (((unsigned char)nibble1) << 4) | ((unsigned char)nibble2);
|
||||
--len;
|
||||
}
|
||||
|
||||
if ((len == 0 && *hexstr == 0))
|
||||
ret = true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
uint8_t genesis_block[80];
|
||||
uint8_t hash[32];
|
||||
|
||||
uint8_t last_prehash[32];
|
||||
uint8_t last_prehash2[32];
|
||||
uint8_t prehash_tab[32];
|
||||
uint8_t nonce_tab[8];
|
||||
char *prehash_str = "d76ffb1d8e31ec04579b0452b52bde7dbd088e912ab1b11ba924ff309ab44a43";//argv[1];
|
||||
char *nonce_str = "80aa59a7901f2502";//argv[2];
|
||||
//char *last_prehash_str = argv[3];
|
||||
//char *last_prehash_str2 = argv[4];
|
||||
|
||||
hex2bin(prehash_tab, prehash_str, strlen(prehash_str)/2);
|
||||
hex2bin(nonce_tab, nonce_str, strlen(nonce_str)/2);
|
||||
//hex2bin(last_prehash, last_prehash_str, strlen(last_prehash_str)/2);
|
||||
//hex2bin(last_prehash2, last_prehash_str2, strlen(last_prehash_str2)/2);
|
||||
/*for (uint8_t i = 0; i<32;i++){
|
||||
printf("0x%x, ",prehash_tab[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
for (uint8_t i = 0; i<8;i++){
|
||||
printf("0x%x, ",nonce_tab[i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
|
||||
//uint8_t prehash[32] = {0x81,0x55,0x3a,0x69,0x5a,0x05,0x88,0x99,0x8c,0x41,0x37,0x92,0xe7,0x4c,0xe8,0xb8,0xf8,0xa0,0x96,0xd6,0x4b,0x3e,0xe4,0x73,0x87,0x37,0x24,0x34,0x48,0x5c,0x0b,0x6f};
|
||||
//uint8_t utime[8] = {0x00,0x00,0x01,0x84,0x8c,0xa8,0x7c,0x49};
|
||||
uint8_t pad[32] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
|
||||
//uint8_t nonce[8] = {0x2f,0x84,0x00,0x00,0x0e,0xba,0x16,0x7c};
|
||||
|
||||
#if 0
|
||||
//uint8_t prehash[32] = {0xa4,0x8f,0xae,0x69,0xeb,0x28,0xc7,0xe0,0x14,0x11,0x4f,0x01,0xae,0x60,0xc8,0xc3,0x82,0x73,0xc4,0x60,0x66,0xcf,0x95,0xd6,0x77,0x1a,0x55,0xd6,0x16,0xd7,0xa1,0x9a};//大端
|
||||
//uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x22,0x1e,0xad,0x44};
|
||||
//uint8_t nonce[8] = {0x8e,0xd4,0x00,0x10,0x6b,0xe7,0xe4,0x00};
|
||||
//uint8_t nonce[8] = {0x8e,0xd4,0x00,0x12,0x27,0xc6,0x90,0xa0};
|
||||
//uint8_t nonce[8] = {0x8e,0xd4,0x00,0x32,0x0b,0x6b,0xd6,0xd1};
|
||||
|
||||
|
||||
//3f 9a aa c6 32 af 1a 4e 0e 1f ea 8a f8 e3 d5 32 b7 5a a4 71 b2 e4 ef fe a5 bd cc fa 3b dd b6 61
|
||||
uint8_t prehash[32] = {0x3f,0x9a,0xaa,0xc6,0x32,0xaf,0x1a,0x4e,0x0e,0x1f,0xea,0x8a,0xf8,0xe3,0xd5,0x32,0xb7,0x5a,0xa4,0x71,0xb2,0xe4,0xef,0xfe,0xa5,0xbd,0xcc,0xfa,0x3b,0xdd,0xb6,0x61};//大端
|
||||
uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x21,0xeb,0x73,0x79};
|
||||
uint8_t nonce[8] = {0xa3,0xdd,0x02,0x10,0x1a,0x87,0xb4,0x70};
|
||||
|
||||
|
||||
|
||||
|
||||
#else
|
||||
|
||||
|
||||
/*443e01000000ffff00000000
|
||||
e0af2a3ba173157d3f70c94aad742fdf16d9930fdfc9d6301e869bcef04ced6c
|
||||
e0af2a3ba173157d3f70c94aad742fdf16d9930fdfc9d6301e869bcef04ced6c
|
||||
dbee84288701000000000000901f25020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
|
||||
[2023-03-28 22:00:46.549] 00 cc 01 11 70 83 85 16 90 1f 25 02
|
||||
|
||||
kas_pow_hash: in:e0af2a3ba173157d3f70c94aad742fdf16d9930fdfc9d6301e869bcef04ced6cdbee842887010000000000000000000000000000000000000000000000000000000000000000000070838516901f2502
|
||||
kas_pow_hash: out:dae78f5008d3b66f
|
||||
01a740ce33c812ba
|
||||
772b3f5763da7bc6
|
||||
da24cb6c00000000*/
|
||||
|
||||
uint8_t prehash[32] = {0xe0,0xaf,0x2a,0x3b,0xa1,0x73,0x15,0x7d,0x3f,0x70,0xc9,0x4a,0xad,0x74,0x2f,0xdf,0x16,0xd9,0x93,0x0f,0xdf,0xc9,0xd6,0x30,0x1e,0x86,0x9b,0xce,0xf0,0x4c,0xed,0x6c};
|
||||
//uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x28,0x84,0xee,0xdb};
|
||||
uint8_t nonce[8] = {0x02,0x25,0x1f,0x90,0x16,0x85,0x83,0x70};
|
||||
|
||||
#endif
|
||||
|
||||
/*for (int i = 0; i < 32; ++i) genesis_block[i] = prehash[i];
|
||||
for (int i = 0; i < 8; ++i) genesis_block[i+32] = utime[7-i];
|
||||
for (int i = 0; i < 32; ++i) genesis_block[i+40] = pad[31-i];
|
||||
for (int i = 0; i < 8; ++i) genesis_block[i+72] = nonce[7-i];*/
|
||||
//uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x21,0xeb,0x73,0x79};
|
||||
//dbee8428870100000
|
||||
uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x28,0x84,0xee,0xdb};
|
||||
|
||||
for (int i = 0; i < 32; ++i) genesis_block[i] = prehash_tab[i];
|
||||
for (int i = 0; i < 8; ++i) genesis_block[i+32] = utime[7-i];
|
||||
for (int i = 0; i < 32; ++i) genesis_block[i+40] = pad[31-i];
|
||||
for (int i = 0; i < 8; ++i) genesis_block[i+72] = nonce_tab[i];
|
||||
|
||||
clock_t start, finish;
|
||||
double Total_time;
|
||||
uint32_t cnt = 0;;
|
||||
//while(1)
|
||||
{
|
||||
start = clock();
|
||||
opticalbtc_hash((const char*)&genesis_block, (char*)&hash, sizeof(genesis_block));
|
||||
finish = clock();
|
||||
Total_time = (double)(finish-start) / CLOCKS_PER_SEC;
|
||||
printf( "\n cnt = %d, opticalbtc_hash run times %f seconds\n", cnt++, Total_time);
|
||||
|
||||
for (int i=31; i>-1; i--) {
|
||||
printf("%02hhx", hash[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
//if (hash[31] != 0 || hash[30] != 0){
|
||||
// for (int i = 0; i < 32; ++i) genesis_block[i] = last_prehash[i];
|
||||
// opticalbtc_hash((const char*)&genesis_block, (char*)&hash, sizeof(genesis_block));
|
||||
//}
|
||||
|
||||
//if (hash[31] != 0 || hash[30] != 0){
|
||||
// for (int i = 0; i < 32; ++i) genesis_block[i] = last_prehash2[i];
|
||||
// opticalbtc_hash((const char*)&genesis_block, (char*)&hash, sizeof(genesis_block));
|
||||
//}
|
||||
|
||||
if (hash[31] != 0 && hash[30] != 0){
|
||||
printf("reject\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
//g++ -std=c++11 *.cpp
|
||||
44
heavyHash/uint256.h
Normal file
44
heavyHash/uint256.h
Normal file
@@ -0,0 +1,44 @@
|
||||
// Copyright (c) 2009-2010 Satoshi Nakamoto
|
||||
// Copyright (c) 2009-2016 The Bitcoin Core developers
|
||||
// Distributed under the MIT software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#ifndef BITCOIN_UINT256_H
|
||||
#define BITCOIN_UINT256_H
|
||||
|
||||
#include <assert.h>
|
||||
//#include <cstring>
|
||||
//#include <stdexcept>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
//#include <vector>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
|
||||
/** 256-bit opaque blob.
|
||||
* @note This type is called uint256 for historical reasons only. It is an
|
||||
* opaque blob of 256 bits and has no integer operations. Use arith_uint256 if
|
||||
* those are required.
|
||||
*/
|
||||
#define UPPER_P(x) x->elements[0]
|
||||
#define LOWER_P(x) x->elements[1]
|
||||
#define UPPER(x) x.elements[0]
|
||||
#define LOWER(x) x.elements[1]
|
||||
#define WIDTH 32
|
||||
|
||||
typedef struct class_base_blob base_blob_t;
|
||||
struct class_base_blob{
|
||||
uint8_t data[WIDTH];
|
||||
};
|
||||
|
||||
|
||||
typedef struct uint128_t { uint64_t elements[2]; } uint128_t;
|
||||
typedef struct uint256_t {
|
||||
uint128_t elements[2];
|
||||
base_blob_t bb;
|
||||
} uint256;
|
||||
|
||||
|
||||
|
||||
#endif // BITCOIN_UINT256_H
|
||||
15
heavyHash/xoshiro256pp.h
Normal file
15
heavyHash/xoshiro256pp.h
Normal file
@@ -0,0 +1,15 @@
|
||||
#ifndef OPOW_CRYPTO_XOSHIRO256PP_H
|
||||
#define OPOW_CRYPTO_XOSHIRO256PP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "uint256.h"
|
||||
|
||||
|
||||
typedef struct class_XoShiRo256PlusPlus XoShiRo256PlusPlus_t;
|
||||
struct class_XoShiRo256PlusPlus{
|
||||
uint64_t s[4];
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif //OPOW_CRYPTO_XOSHIRO256PP_H
|
||||
BIN
randomx/a.exe
Normal file
BIN
randomx/a.exe
Normal file
Binary file not shown.
241
randomx/aes_hash.cpp
Normal file
241
randomx/aes_hash.cpp
Normal file
@@ -0,0 +1,241 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "soft_aes.h"
|
||||
#include <cassert>
|
||||
|
||||
//NOTE: The functions below were tuned for maximum performance
|
||||
//and are not cryptographically secure outside of the scope of RandomX.
|
||||
//It's not recommended to use them as general hash functions and PRNGs.
|
||||
|
||||
//AesHash1R:
|
||||
//state0, state1, state2, state3 = Blake2b-512("RandomX AesHash1R state")
|
||||
//xkey0, xkey1 = Blake2b-256("RandomX AesHash1R xkeys")
|
||||
|
||||
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
|
||||
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
|
||||
#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017
|
||||
#define AES_HASH_1R_STATE3 0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c
|
||||
|
||||
#define AES_HASH_1R_XKEY0 0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389
|
||||
#define AES_HASH_1R_XKEY1 0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1
|
||||
|
||||
/*
|
||||
Calculate a 512-bit hash of 'input' using 4 lanes of AES.
|
||||
The input is treated as a set of round keys for the encryption
|
||||
of the initial state.
|
||||
|
||||
'inputSize' must be a multiple of 64.
|
||||
|
||||
For a 2 MiB input, this has the same security as 32768-round
|
||||
AES encryption.
|
||||
|
||||
Hashing throughput: >20 GiB/s per CPU core with hardware AES
|
||||
*/
|
||||
template<bool softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
|
||||
assert(inputSize % 64 == 0);
|
||||
const uint8_t* inptr = (uint8_t*)input;
|
||||
const uint8_t* inputEnd = inptr + inputSize;
|
||||
|
||||
rx_vec_i128 state0, state1, state2, state3;
|
||||
rx_vec_i128 in0, in1, in2, in3;
|
||||
|
||||
//intial state
|
||||
state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
|
||||
state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
|
||||
state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
|
||||
state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
|
||||
|
||||
//process 64 bytes at a time in 4 lanes
|
||||
while (inptr < inputEnd) {
|
||||
in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0);
|
||||
in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1);
|
||||
in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2);
|
||||
in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3);
|
||||
|
||||
state0 = aesenc<softAes>(state0, in0);
|
||||
state1 = aesdec<softAes>(state1, in1);
|
||||
state2 = aesenc<softAes>(state2, in2);
|
||||
state3 = aesdec<softAes>(state3, in3);
|
||||
|
||||
inptr += 64;
|
||||
}
|
||||
|
||||
//two extra rounds to achieve full diffusion
|
||||
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
|
||||
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
|
||||
|
||||
state0 = aesenc<softAes>(state0, xkey0);
|
||||
state1 = aesdec<softAes>(state1, xkey0);
|
||||
state2 = aesenc<softAes>(state2, xkey0);
|
||||
state3 = aesdec<softAes>(state3, xkey0);
|
||||
|
||||
state0 = aesenc<softAes>(state0, xkey1);
|
||||
state1 = aesdec<softAes>(state1, xkey1);
|
||||
state2 = aesenc<softAes>(state2, xkey1);
|
||||
state3 = aesdec<softAes>(state3, xkey1);
|
||||
|
||||
//output hash
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 1, state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 2, state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)hash + 3, state3);
|
||||
}
|
||||
|
||||
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
|
||||
template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
//AesGenerator1R:
|
||||
//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator1R keys")
|
||||
|
||||
#define AES_GEN_1R_KEY0 0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553
|
||||
#define AES_GEN_1R_KEY1 0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07
|
||||
#define AES_GEN_1R_KEY2 0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1
|
||||
#define AES_GEN_1R_KEY3 0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135
|
||||
|
||||
/*
|
||||
Fill 'buffer' with pseudorandom data based on 512-bit 'state'.
|
||||
The state is encrypted using a single AES round per 16 bytes of output
|
||||
in 4 lanes.
|
||||
|
||||
'outputSize' must be a multiple of 64.
|
||||
|
||||
The modified state is written back to 'state' to allow multiple
|
||||
calls to this function.
|
||||
*/
|
||||
template<bool softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
assert(outputSize % 64 == 0);
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
|
||||
rx_vec_i128 state0, state1, state2, state3;
|
||||
rx_vec_i128 key0, key1, key2, key3;
|
||||
|
||||
key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
|
||||
key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
|
||||
key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
|
||||
key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
|
||||
|
||||
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
|
||||
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
|
||||
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
|
||||
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state0 = aesdec<softAes>(state0, key0);
|
||||
state1 = aesenc<softAes>(state1, key1);
|
||||
state2 = aesdec<softAes>(state2, key2);
|
||||
state3 = aesenc<softAes>(state3, key3);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)state + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)state + 1, state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)state + 2, state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)state + 3, state3);
|
||||
}
|
||||
|
||||
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
//AesGenerator4R:
|
||||
//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3")
|
||||
//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7")
|
||||
|
||||
#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
|
||||
#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
|
||||
#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
|
||||
#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
|
||||
#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
|
||||
#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
|
||||
#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
|
||||
#define AES_GEN_4R_KEY7 0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609
|
||||
|
||||
template<bool softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
|
||||
assert(outputSize % 64 == 0);
|
||||
const uint8_t* outptr = (uint8_t*)buffer;
|
||||
const uint8_t* outputEnd = outptr + outputSize;
|
||||
//printf("outputSize= %zu\n",outputSize); //outputSize =2176 这里填充program 空间,总计128+256x8 byte
|
||||
rx_vec_i128 state0, state1, state2, state3;
|
||||
rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7;
|
||||
|
||||
key0 = rx_set_int_vec_i128(AES_GEN_4R_KEY0);
|
||||
key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
|
||||
key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
|
||||
key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
|
||||
key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
|
||||
key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
|
||||
key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
|
||||
key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
|
||||
|
||||
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
|
||||
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
|
||||
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
|
||||
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
|
||||
|
||||
while (outptr < outputEnd) {
|
||||
state0 = aesdec<softAes>(state0, key0);
|
||||
state1 = aesenc<softAes>(state1, key0);
|
||||
state2 = aesdec<softAes>(state2, key4);
|
||||
state3 = aesenc<softAes>(state3, key4);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key1);
|
||||
state1 = aesenc<softAes>(state1, key1);
|
||||
state2 = aesdec<softAes>(state2, key5);
|
||||
state3 = aesenc<softAes>(state3, key5);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key2);
|
||||
state1 = aesenc<softAes>(state1, key2);
|
||||
state2 = aesdec<softAes>(state2, key6);
|
||||
state3 = aesenc<softAes>(state3, key6);
|
||||
|
||||
state0 = aesdec<softAes>(state0, key3);
|
||||
state1 = aesenc<softAes>(state1, key3);
|
||||
state2 = aesdec<softAes>(state2, key7);
|
||||
state3 = aesenc<softAes>(state3, key7);
|
||||
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
|
||||
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
|
||||
|
||||
outptr += 64;
|
||||
}
|
||||
}
|
||||
|
||||
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
|
||||
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);
|
||||
40
randomx/aes_hash.hpp
Normal file
40
randomx/aes_hash.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
template<bool softAes>
|
||||
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
|
||||
|
||||
template<bool softAes>
|
||||
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
|
||||
|
||||
template<bool softAes>
|
||||
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);
|
||||
60
randomx/allocator.cpp
Normal file
60
randomx/allocator.cpp
Normal file
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <new>
|
||||
#include "allocator.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "virtual_memory.hpp"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<size_t alignment>
|
||||
void* AlignedAllocator<alignment>::allocMemory(size_t count) {
|
||||
void *mem = rx_aligned_alloc(count, alignment);
|
||||
if (mem == nullptr)
|
||||
throw std::bad_alloc();
|
||||
return mem;
|
||||
}
|
||||
|
||||
template<size_t alignment>
|
||||
void AlignedAllocator<alignment>::freeMemory(void* ptr, size_t count) {
|
||||
rx_aligned_free(ptr);
|
||||
}
|
||||
|
||||
template class AlignedAllocator<CacheLineSize>;
|
||||
|
||||
void* LargePageAllocator::allocMemory(size_t count) {
|
||||
return allocLargePagesMemory(count);
|
||||
}
|
||||
|
||||
void LargePageAllocator::freeMemory(void* ptr, size_t count) {
|
||||
freePagedMemory(ptr, count);
|
||||
};
|
||||
|
||||
}
|
||||
46
randomx/allocator.hpp
Normal file
46
randomx/allocator.hpp
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<size_t alignment>
|
||||
struct AlignedAllocator {
|
||||
static void* allocMemory(size_t);
|
||||
static void freeMemory(void*, size_t);
|
||||
};
|
||||
|
||||
struct LargePageAllocator {
|
||||
static void* allocMemory(size_t);
|
||||
static void freeMemory(void*, size_t);
|
||||
};
|
||||
|
||||
}
|
||||
261
randomx/argon2.h
Normal file
261
randomx/argon2.h
Normal file
@@ -0,0 +1,261 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <limits.h>
|
||||
|
||||
/*
|
||||
* Argon2 input parameter restrictions
|
||||
*/
|
||||
|
||||
/* Minimum and maximum number of lanes (degree of parallelism) */
|
||||
#define ARGON2_MIN_LANES UINT32_C(1)
|
||||
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of threads */
|
||||
#define ARGON2_MIN_THREADS UINT32_C(1)
|
||||
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
|
||||
|
||||
/* Number of synchronization points between lanes per pass */
|
||||
#define ARGON2_SYNC_POINTS UINT32_C(4)
|
||||
|
||||
/* Minimum and maximum digest size in bytes */
|
||||
#define ARGON2_MIN_OUTLEN UINT32_C(4)
|
||||
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
|
||||
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
|
||||
|
||||
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||
/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
|
||||
#define ARGON2_MAX_MEMORY_BITS \
|
||||
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
|
||||
#define ARGON2_MAX_MEMORY \
|
||||
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
|
||||
|
||||
/* Minimum and maximum number of passes */
|
||||
#define ARGON2_MIN_TIME UINT32_C(1)
|
||||
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum password length in bytes */
|
||||
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum associated data length in bytes */
|
||||
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
|
||||
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum salt length in bytes */
|
||||
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
|
||||
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Minimum and maximum key length in bytes */
|
||||
#define ARGON2_MIN_SECRET UINT32_C(0)
|
||||
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
|
||||
|
||||
/* Flags to determine which fields are securely wiped (default = no wipe). */
|
||||
#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
|
||||
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
|
||||
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
|
||||
|
||||
|
||||
/* Error codes */
|
||||
typedef enum Argon2_ErrorCodes {
|
||||
ARGON2_OK = 0,
|
||||
|
||||
ARGON2_OUTPUT_PTR_NULL = -1,
|
||||
|
||||
ARGON2_OUTPUT_TOO_SHORT = -2,
|
||||
ARGON2_OUTPUT_TOO_LONG = -3,
|
||||
|
||||
ARGON2_PWD_TOO_SHORT = -4,
|
||||
ARGON2_PWD_TOO_LONG = -5,
|
||||
|
||||
ARGON2_SALT_TOO_SHORT = -6,
|
||||
ARGON2_SALT_TOO_LONG = -7,
|
||||
|
||||
ARGON2_AD_TOO_SHORT = -8,
|
||||
ARGON2_AD_TOO_LONG = -9,
|
||||
|
||||
ARGON2_SECRET_TOO_SHORT = -10,
|
||||
ARGON2_SECRET_TOO_LONG = -11,
|
||||
|
||||
ARGON2_TIME_TOO_SMALL = -12,
|
||||
ARGON2_TIME_TOO_LARGE = -13,
|
||||
|
||||
ARGON2_MEMORY_TOO_LITTLE = -14,
|
||||
ARGON2_MEMORY_TOO_MUCH = -15,
|
||||
|
||||
ARGON2_LANES_TOO_FEW = -16,
|
||||
ARGON2_LANES_TOO_MANY = -17,
|
||||
|
||||
ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
|
||||
ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
|
||||
ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
|
||||
ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
|
||||
|
||||
ARGON2_MEMORY_ALLOCATION_ERROR = -22,
|
||||
|
||||
ARGON2_FREE_MEMORY_CBK_NULL = -23,
|
||||
ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
|
||||
|
||||
ARGON2_INCORRECT_PARAMETER = -25,
|
||||
ARGON2_INCORRECT_TYPE = -26,
|
||||
|
||||
ARGON2_OUT_PTR_MISMATCH = -27,
|
||||
|
||||
ARGON2_THREADS_TOO_FEW = -28,
|
||||
ARGON2_THREADS_TOO_MANY = -29,
|
||||
|
||||
ARGON2_MISSING_ARGS = -30,
|
||||
|
||||
ARGON2_ENCODING_FAIL = -31,
|
||||
|
||||
ARGON2_DECODING_FAIL = -32,
|
||||
|
||||
ARGON2_THREAD_FAIL = -33,
|
||||
|
||||
ARGON2_DECODING_LENGTH_FAIL = -34,
|
||||
|
||||
ARGON2_VERIFY_MISMATCH = -35
|
||||
} argon2_error_codes;
|
||||
|
||||
/* Memory allocator types --- for external allocation */
|
||||
typedef int(*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
|
||||
typedef void(*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
|
||||
|
||||
/* Argon2 external data structures */
|
||||
|
||||
/*
|
||||
*****
|
||||
* Context: structure to hold Argon2 inputs:
|
||||
* output array and its length,
|
||||
* password and its length,
|
||||
* salt and its length,
|
||||
* secret and its length,
|
||||
* associated data and its length,
|
||||
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
|
||||
* number of parallel threads that will be run.
|
||||
* All the parameters above affect the output hash value.
|
||||
* Additionally, two function pointers can be provided to allocate and
|
||||
* deallocate the memory (if NULL, memory will be allocated internally).
|
||||
* Also, three flags indicate whether to erase password, secret as soon as they
|
||||
* are pre-hashed (and thus not needed anymore), and the entire memory
|
||||
*****
|
||||
* Simplest situation: you have output array out[8], password is stored in
|
||||
* pwd[32], salt is stored in salt[16], you do not have keys nor associated
|
||||
* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
|
||||
* 4 parallel lanes.
|
||||
* You want to erase the password, but you're OK with last pass not being
|
||||
* erased. You want to use the default memory allocator.
|
||||
* Then you initialize:
|
||||
Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
|
||||
*/
|
||||
typedef struct Argon2_Context {
|
||||
uint8_t *out; /* output array */
|
||||
uint32_t outlen; /* digest length */
|
||||
|
||||
uint8_t *pwd; /* password array */
|
||||
uint32_t pwdlen; /* password length */
|
||||
|
||||
uint8_t *salt; /* salt array */
|
||||
uint32_t saltlen; /* salt length */
|
||||
|
||||
uint8_t *secret; /* key array */
|
||||
uint32_t secretlen; /* key length */
|
||||
|
||||
uint8_t *ad; /* associated data array */
|
||||
uint32_t adlen; /* associated data length */
|
||||
|
||||
uint32_t t_cost; /* number of passes */
|
||||
uint32_t m_cost; /* amount of memory requested (KB) */
|
||||
uint32_t lanes; /* number of lanes */
|
||||
uint32_t threads; /* maximum number of threads */
|
||||
|
||||
uint32_t version; /* version number */
|
||||
|
||||
allocate_fptr allocate_cbk; /* pointer to memory allocator */
|
||||
deallocate_fptr free_cbk; /* pointer to memory deallocator */
|
||||
|
||||
uint32_t flags; /* array of bool options */
|
||||
} argon2_context;
|
||||
|
||||
/* Argon2 primitive type */
|
||||
typedef enum Argon2_type {
|
||||
Argon2_d = 0,
|
||||
Argon2_i = 1,
|
||||
Argon2_id = 2
|
||||
} argon2_type;
|
||||
|
||||
/* Version of the algorithm */
|
||||
typedef enum Argon2_version {
|
||||
ARGON2_VERSION_10 = 0x10,
|
||||
ARGON2_VERSION_13 = 0x13,
|
||||
ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
|
||||
} argon2_version;
|
||||
|
||||
//Argon2 instance - forward declaration
|
||||
typedef struct Argon2_instance_t argon2_instance_t;
|
||||
|
||||
//Argon2 position = forward declaration
|
||||
typedef struct Argon2_position_t argon2_position_t;
|
||||
|
||||
//Argon2 implementation function
|
||||
typedef void randomx_argon2_impl(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Function that fills the segment using previous segments also from other
|
||||
* threads
|
||||
* @param context current context
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void randomx_argon2_fill_segment_ref(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
|
||||
randomx_argon2_impl *randomx_argon2_impl_ssse3();
|
||||
randomx_argon2_impl *randomx_argon2_impl_avx2();
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
175
randomx/argon2_avx2.c
Normal file
175
randomx/argon2_avx2.c
Normal file
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "argon2.h"
|
||||
|
||||
void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
|
||||
randomx_argon2_impl* randomx_argon2_impl_avx2() {
|
||||
#if defined(__AVX2__)
|
||||
return &randomx_argon2_fill_segment_avx2;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
#include "argon2_core.h"
|
||||
|
||||
#include "blake2/blamka-round-avx2.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
static void fill_block(__m256i* state, const block* ref_block,
|
||||
block* next_block, int with_xor) {
|
||||
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
|
||||
block_XY[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i*)next_block->v + i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm256_xor_si256(
|
||||
state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
|
||||
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
BLAKE2_ROUND_2(state[0 + i], state[4 + i], state[8 + i], state[12 + i],
|
||||
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
|
||||
_mm256_storeu_si256((__m256i*)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
|
||||
argon2_position_t position) {
|
||||
// printf("randomx_argon2_fill_segment_avx2\n");
|
||||
block* ref_block = NULL, * curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index, i;
|
||||
__m256i state[ARGON2_HWORDS_IN_BLOCK];
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length +
|
||||
position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
}
|
||||
else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
if (0 == position.pass) {
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
fill_block(state, ref_block, curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
396
randomx/argon2_core.c
Normal file
396
randomx/argon2_core.c
Normal file
@@ -0,0 +1,396 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
/*For memory wiping*/
|
||||
#ifdef _MSC_VER
|
||||
#include <windows.h>
|
||||
#include <winbase.h> /* For SecureZeroMemory */
|
||||
#endif
|
||||
#if defined __STDC_LIB_EXT1__
|
||||
#define __STDC_WANT_LIB_EXT1__ 1
|
||||
#endif
|
||||
#define VC_GE_2005(version) (version >= 1400)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "argon2_core.h"
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
|
||||
#ifdef GENKAT
|
||||
#include "genkat.h"
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#if __has_attribute(optnone)
|
||||
#define NOT_OPTIMIZED __attribute__((optnone))
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#define GCC_VERSION \
|
||||
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
|
||||
#if GCC_VERSION >= 40400
|
||||
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
|
||||
#endif
|
||||
#endif
|
||||
#ifndef NOT_OPTIMIZED
|
||||
#define NOT_OPTIMIZED
|
||||
#endif
|
||||
|
||||
/***************Instance and Position constructors**********/
|
||||
|
||||
static void load_block(block *dst, const void *input) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void store_block(void *output, const block *src) {
|
||||
unsigned i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,const argon2_position_t *position, uint32_t pseudo_rand,int same_lane) {
|
||||
/*
|
||||
* Pass 0:
|
||||
* This lane : all already finished segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : all already finished segments
|
||||
* Pass 1+:
|
||||
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
|
||||
* blocks in this segment
|
||||
* Other lanes : (SYNC_POINTS - 1) last segments
|
||||
*/
|
||||
uint32_t reference_area_size;
|
||||
uint64_t relative_position;
|
||||
uint32_t start_position, absolute_position;
|
||||
|
||||
if (0 == position->pass) {
|
||||
/* First pass */
|
||||
if (0 == position->slice) {
|
||||
/* First slice */
|
||||
reference_area_size = position->index - 1; /* all but the previous */
|
||||
}
|
||||
else {
|
||||
if (same_lane) {
|
||||
/* The same lane => add current segment */
|
||||
reference_area_size =position->slice * instance->segment_length +position->index - 1;
|
||||
}
|
||||
else {
|
||||
reference_area_size =position->slice * instance->segment_length +((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Second pass */
|
||||
if (same_lane) {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length + position->index -
|
||||
1;
|
||||
}
|
||||
else {
|
||||
reference_area_size = instance->lane_length -
|
||||
instance->segment_length +
|
||||
((position->index == 0) ? (-1) : 0);
|
||||
}
|
||||
}
|
||||
|
||||
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
|
||||
* relative position */
|
||||
relative_position = pseudo_rand;
|
||||
relative_position = relative_position * relative_position >> 32;
|
||||
relative_position = reference_area_size - 1 - (reference_area_size * relative_position >> 32);
|
||||
|
||||
/* 1.2.5 Computing starting position */
|
||||
start_position = 0;
|
||||
|
||||
if (0 != position->pass) {
|
||||
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)? 0 : (position->slice + 1) * instance->segment_length;
|
||||
}
|
||||
|
||||
/* 1.2.6. Computing absolute position */
|
||||
absolute_position = (start_position + relative_position) % instance->lane_length; /* absolute position */
|
||||
return absolute_position;
|
||||
}
|
||||
|
||||
/* Single-threaded version for p=1 case */
|
||||
static int fill_memory_blocks_st(argon2_instance_t *instance) {
|
||||
uint32_t r, s, l;
|
||||
|
||||
for (r = 0; r < instance->passes; ++r) { //3
|
||||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { //4
|
||||
for (l = 0; l < instance->lanes; ++l) { //1
|
||||
argon2_position_t position = { r, l, (uint8_t)s, 0 };
|
||||
//fill the segment using the selected implementation
|
||||
instance->impl(instance, position);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
int randomx_argon2_fill_memory_blocks(argon2_instance_t *instance) {
|
||||
if (instance == NULL || instance->lanes == 0) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
return fill_memory_blocks_st(instance);
|
||||
}
|
||||
|
||||
int randomx_argon2_validate_inputs(const argon2_context *context) {
|
||||
if (NULL == context) {
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
|
||||
/* Validate password (required param) */
|
||||
if (NULL == context->pwd) {
|
||||
if (0 != context->pwdlen) {
|
||||
return ARGON2_PWD_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
|
||||
return ARGON2_PWD_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
|
||||
return ARGON2_PWD_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate salt (required param) */
|
||||
if (NULL == context->salt) {
|
||||
if (0 != context->saltlen) {
|
||||
return ARGON2_SALT_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
|
||||
return ARGON2_SALT_TOO_SHORT;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
|
||||
return ARGON2_SALT_TOO_LONG;
|
||||
}
|
||||
|
||||
/* Validate secret (optional param) */
|
||||
if (NULL == context->secret) {
|
||||
if (0 != context->secretlen) {
|
||||
return ARGON2_SECRET_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (ARGON2_MIN_SECRET > context->secretlen) {
|
||||
return ARGON2_SECRET_TOO_SHORT;
|
||||
}
|
||||
if (ARGON2_MAX_SECRET < context->secretlen) {
|
||||
return ARGON2_SECRET_TOO_LONG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate associated data (optional param) */
|
||||
if (NULL == context->ad) {
|
||||
if (0 != context->adlen) {
|
||||
return ARGON2_AD_PTR_MISMATCH;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (ARGON2_MIN_AD_LENGTH > context->adlen) {
|
||||
return ARGON2_AD_TOO_SHORT;
|
||||
}
|
||||
if (ARGON2_MAX_AD_LENGTH < context->adlen) {
|
||||
return ARGON2_AD_TOO_LONG;
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate memory cost */
|
||||
if (ARGON2_MIN_MEMORY > context->m_cost) {
|
||||
return ARGON2_MEMORY_TOO_LITTLE;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_MEMORY < context->m_cost) {
|
||||
return ARGON2_MEMORY_TOO_MUCH;
|
||||
}
|
||||
|
||||
if (context->m_cost < 8 * context->lanes) {
|
||||
return ARGON2_MEMORY_TOO_LITTLE;
|
||||
}
|
||||
|
||||
/* Validate time cost */
|
||||
if (ARGON2_MIN_TIME > context->t_cost) {
|
||||
return ARGON2_TIME_TOO_SMALL;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_TIME < context->t_cost) {
|
||||
return ARGON2_TIME_TOO_LARGE;
|
||||
}
|
||||
|
||||
/* Validate lanes */
|
||||
if (ARGON2_MIN_LANES > context->lanes) {
|
||||
return ARGON2_LANES_TOO_FEW;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_LANES < context->lanes) {
|
||||
return ARGON2_LANES_TOO_MANY;
|
||||
}
|
||||
|
||||
/* Validate threads */
|
||||
if (ARGON2_MIN_THREADS > context->threads) {
|
||||
return ARGON2_THREADS_TOO_FEW;
|
||||
}
|
||||
|
||||
if (ARGON2_MAX_THREADS < context->threads) {
|
||||
return ARGON2_THREADS_TOO_MANY;
|
||||
}
|
||||
|
||||
if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
|
||||
return ARGON2_FREE_MEMORY_CBK_NULL;
|
||||
}
|
||||
|
||||
if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
|
||||
return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
|
||||
}
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
|
||||
uint32_t l;
|
||||
/* Make the first and second block in each lane as G(H0||0||i) or
|
||||
G(H0||1||i) */
|
||||
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; //ARGON2_BLOCK_SIZE=1024
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
|
||||
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&instance->memory[l * instance->lane_length + 0],blockhash_bytes);
|
||||
|
||||
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
|
||||
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,ARGON2_PREHASH_SEED_LENGTH);
|
||||
load_block(&instance->memory[l * instance->lane_length + 1],blockhash_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) {
|
||||
blake2b_state BlakeHash;
|
||||
uint8_t value[sizeof(uint32_t)];
|
||||
|
||||
if (NULL == context || NULL == blockhash) {
|
||||
return;
|
||||
}
|
||||
|
||||
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
|
||||
|
||||
store32(&value, context->lanes);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->outlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->m_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->t_cost);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->version);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, (uint32_t)type);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
store32(&value, context->pwdlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->pwd != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
|
||||
context->pwdlen);
|
||||
}
|
||||
|
||||
store32(&value, context->saltlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->salt != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->salt, context->saltlen);
|
||||
}
|
||||
|
||||
store32(&value, context->secretlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->secret != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
|
||||
context->secretlen);
|
||||
}
|
||||
|
||||
store32(&value, context->adlen);
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
|
||||
|
||||
if (context->ad != NULL) {
|
||||
blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
|
||||
context->adlen);
|
||||
}
|
||||
|
||||
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); //ARGON2_PREHASH_DIGEST_LENGTH=64
|
||||
}
|
||||
|
||||
int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context) {
|
||||
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; //ARGON2_PREHASH_SEED_LENGTH=72
|
||||
int result = ARGON2_OK;
|
||||
|
||||
if (instance == NULL || context == NULL)
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
instance->context_ptr = context;
|
||||
|
||||
/* 1. Memory allocation */
|
||||
//RandomX takes care of memory allocation
|
||||
|
||||
/* 2. Initial hashing */
|
||||
/* H_0 + 8 extra bytes to produce the first blocks */
|
||||
/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
|
||||
/* Hashing all inputs */
|
||||
rxa2_initial_hash(blockhash, context, instance->type);
|
||||
/* Zeroing 8 extra bytes */
|
||||
/*rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
|
||||
ARGON2_PREHASH_SEED_LENGTH -
|
||||
ARGON2_PREHASH_DIGEST_LENGTH);*/
|
||||
|
||||
/* 3. Creating first blocks, we always have at least two blocks in a slice
|
||||
*/
|
||||
rxa2_fill_first_blocks(blockhash, instance);
|
||||
|
||||
return ARGON2_OK;
|
||||
}
|
||||
163
randomx/argon2_core.h
Normal file
163
randomx/argon2_core.h
Normal file
@@ -0,0 +1,163 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef ARGON2_CORE_H
|
||||
#define ARGON2_CORE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "argon2.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define CONST_CAST(x) (x)(uintptr_t)
|
||||
|
||||
/**********************Argon2 internal constants*******************************/
|
||||
|
||||
enum argon2_core_constants {
|
||||
/* Memory block size in bytes */
|
||||
ARGON2_BLOCK_SIZE = 1024,
|
||||
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
|
||||
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
|
||||
ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
|
||||
ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
|
||||
|
||||
/* Number of pseudo-random values generated by one call to Blake in Argon2i
|
||||
to
|
||||
generate reference block positions */
|
||||
ARGON2_ADDRESSES_IN_BLOCK = 128,
|
||||
|
||||
/* Pre-hashing digest length and its extension*/
|
||||
ARGON2_PREHASH_DIGEST_LENGTH = 64,
|
||||
ARGON2_PREHASH_SEED_LENGTH = 72
|
||||
};
|
||||
|
||||
/*************************Argon2 internal data types***********************/
|
||||
|
||||
/*
|
||||
* Structure for the (1KB) memory block implemented as 128 64-bit words.
|
||||
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
|
||||
* bounds checking).
|
||||
*/
|
||||
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
|
||||
|
||||
/*
|
||||
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
|
||||
* and derived values.
|
||||
* Used to evaluate the number and location of blocks to construct in each
|
||||
* thread
|
||||
*/
|
||||
typedef struct Argon2_instance_t {
|
||||
block *memory; /* Memory pointer */
|
||||
uint32_t version;
|
||||
uint32_t passes; /* Number of passes */
|
||||
uint32_t memory_blocks; /* Number of blocks in memory */
|
||||
uint32_t segment_length;
|
||||
uint32_t lane_length;
|
||||
uint32_t lanes;
|
||||
uint32_t threads;
|
||||
argon2_type type;
|
||||
int print_internals; /* whether to print the memory blocks */
|
||||
argon2_context *context_ptr; /* points back to original context */
|
||||
randomx_argon2_impl *impl;
|
||||
} argon2_instance_t;
|
||||
|
||||
/*
|
||||
* Argon2 position: where we construct the block right now. Used to distribute
|
||||
* work between threads.
|
||||
*/
|
||||
typedef struct Argon2_position_t {
|
||||
uint32_t pass;
|
||||
uint32_t lane;
|
||||
uint8_t slice;
|
||||
uint32_t index;
|
||||
} argon2_position_t;
|
||||
|
||||
/*Struct that holds the inputs for thread handling FillSegment*/
|
||||
typedef struct Argon2_thread_data {
|
||||
argon2_instance_t *instance_ptr;
|
||||
argon2_position_t pos;
|
||||
} argon2_thread_data;
|
||||
|
||||
/*************************Argon2 core functions********************************/
|
||||
|
||||
/*
|
||||
* Computes absolute position of reference block in the lane following a skewed
|
||||
* distribution and using a pseudo-random value as input
|
||||
* @param instance Pointer to the current instance
|
||||
* @param position Pointer to the current position
|
||||
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
|
||||
* @param same_lane Indicates if the block will be taken from the current lane.
|
||||
* If so we can reference the current segment
|
||||
* @pre All pointers must be valid
|
||||
*/
|
||||
uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,
|
||||
const argon2_position_t *position, uint32_t pseudo_rand,
|
||||
int same_lane);
|
||||
|
||||
/*
|
||||
* Function that validates all inputs against predefined restrictions and return
|
||||
* an error code
|
||||
* @param context Pointer to current Argon2 context
|
||||
* @return ARGON2_OK if everything is all right, otherwise one of error codes
|
||||
* (all defined in <argon2.h>
|
||||
*/
|
||||
int randomx_argon2_validate_inputs(const argon2_context *context);
|
||||
|
||||
/*
|
||||
* Function allocates memory, hashes the inputs with Blake, and creates first
|
||||
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
|
||||
* initialized
|
||||
* @param context Pointer to the Argon2 internal structure containing memory
|
||||
* pointer, and parameters for time and space requirements.
|
||||
* @param instance Current Argon2 instance
|
||||
* @return Zero if successful, -1 if memory failed to allocate. @context->state
|
||||
* will be modified if successful.
|
||||
*/
|
||||
int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context);
|
||||
|
||||
/*
|
||||
* Function that fills the entire memory t_cost times based on the first two
|
||||
* blocks in each lane
|
||||
* @param instance Pointer to the current instance
|
||||
* @return ARGON2_OK if successful, @context->state
|
||||
*/
|
||||
int randomx_argon2_fill_memory_blocks(argon2_instance_t* instance);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
181
randomx/argon2_ref.c
Normal file
181
randomx/argon2_ref.c
Normal file
@@ -0,0 +1,181 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdio>
|
||||
#include "argon2.h"
|
||||
#include "argon2_core.h"
|
||||
|
||||
#include "blake2/blamka-round-ref.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
static void copy_block(block* dst, const block* src) {
|
||||
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
|
||||
}
|
||||
|
||||
static void xor_block(block* dst, const block* src) {
|
||||
int i;
|
||||
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
|
||||
dst->v[i] ^= src->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Function fills a new memory block and optionally XORs the old block over the new one.
|
||||
* @next_block must be initialized.
|
||||
* @param prev_block Pointer to the previous block
|
||||
* @param ref_block Pointer to the reference block
|
||||
* @param next_block Pointer to the block to be constructed
|
||||
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
static void fill_block(const block *prev_block, const block *ref_block,block *next_block, int with_xor) {
|
||||
block blockR, block_tmp;
|
||||
unsigned i;
|
||||
|
||||
copy_block(&blockR, ref_block);
|
||||
xor_block(&blockR, prev_block);
|
||||
copy_block(&block_tmp, &blockR);
|
||||
/* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */
|
||||
if (with_xor) {
|
||||
/* Saving the next block contents for XOR over: */
|
||||
xor_block(&block_tmp, next_block);
|
||||
/* Now blockR = ref_block + prev_block and
|
||||
block_tmp = ref_block + prev_block + next_block */
|
||||
}
|
||||
|
||||
/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
|
||||
(16,17,..31)... finally (112,113,...127) */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND_NOMSG(
|
||||
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
|
||||
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
|
||||
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
|
||||
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
|
||||
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
|
||||
blockR.v[16 * i + 15]);
|
||||
}
|
||||
|
||||
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
|
||||
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
|
||||
for (i = 0; i < 8; i++) {
|
||||
BLAKE2_ROUND_NOMSG(
|
||||
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
|
||||
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
|
||||
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
|
||||
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
|
||||
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
|
||||
blockR.v[2 * i + 113]);
|
||||
}
|
||||
|
||||
copy_block(next_block, &block_tmp);
|
||||
xor_block(next_block, &blockR);
|
||||
}
|
||||
|
||||
void randomx_argon2_fill_segment_ref(const argon2_instance_t *instance,argon2_position_t position) {
|
||||
printf("randomx_argon2_fill_segment_ref\n");
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block, zero_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index;
|
||||
uint32_t i;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length + position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
}
|
||||
else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
for (i = starting_index; i < instance->segment_length; ++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes; //0
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block = instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
if(i == starting_index && 0 == position.pass && 0 == position.slice) printf("ref_index = %d,curr_offset=%0d,prev_offset=%0d,ref_lane=%0d\n", ref_index, curr_offset, prev_offset, ref_lane);
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
if (0 == position.pass) {
|
||||
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
fill_block(instance->memory + prev_offset, ref_block, curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
183
randomx/argon2_ssse3.c
Normal file
183
randomx/argon2_ssse3.c
Normal file
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "argon2.h"
|
||||
|
||||
#if defined(_MSC_VER) //MSVC doesn't define SSSE3
|
||||
#define __SSSE3__
|
||||
#endif
|
||||
|
||||
void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
|
||||
randomx_argon2_impl* randomx_argon2_impl_ssse3() {
|
||||
#if defined(__SSSE3__)
|
||||
return &randomx_argon2_fill_segment_ssse3;
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if defined(__SSSE3__)
|
||||
|
||||
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
|
||||
|
||||
#include "argon2_core.h"
|
||||
|
||||
#include "blake2/blamka-round-ssse3.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
#include "blake2/blake2.h"
|
||||
|
||||
static void fill_block(__m128i* state, const block* ref_block,
|
||||
block* next_block, int with_xor) {
|
||||
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
|
||||
unsigned int i;
|
||||
|
||||
if (with_xor) {
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
|
||||
block_XY[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i*)next_block->v + i));
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
|
||||
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
|
||||
state[8 * i + 6], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
|
||||
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
|
||||
state[8 * 6 + i], state[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(state[i], block_XY[i]);
|
||||
_mm_storeu_si128((__m128i*)next_block->v + i, state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
|
||||
argon2_position_t position) {
|
||||
// printf("randomx_argon2_fill_segment_ssse3\n");
|
||||
block* ref_block = NULL, * curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
uint64_t pseudo_rand, ref_index, ref_lane;
|
||||
uint32_t prev_offset, curr_offset;
|
||||
uint32_t starting_index, i;
|
||||
__m128i state[ARGON2_OWORDS_IN_BLOCK];
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
starting_index = 0;
|
||||
|
||||
if ((0 == position.pass) && (0 == position.slice)) {
|
||||
starting_index = 2; /* we have already generated the first two blocks */
|
||||
}
|
||||
|
||||
/* Offset of the current block */
|
||||
curr_offset = position.lane * instance->lane_length +
|
||||
position.slice * instance->segment_length + starting_index;
|
||||
|
||||
if (0 == curr_offset % instance->lane_length) {
|
||||
/* Last block in this lane */
|
||||
prev_offset = curr_offset + instance->lane_length - 1;
|
||||
}
|
||||
else {
|
||||
/* Previous block */
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
}
|
||||
|
||||
/* 1.2 Computing the index of the reference block */
|
||||
/* 1.2.1 Taking pseudo-random value from the previous block */
|
||||
pseudo_rand = instance->memory[prev_offset].v[0];
|
||||
|
||||
/* 1.2.2 Computing the lane of the reference block */
|
||||
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
|
||||
|
||||
if ((position.pass == 0) && (position.slice == 0)) {
|
||||
/* Can not reference other lanes yet */
|
||||
ref_lane = position.lane;
|
||||
}
|
||||
|
||||
/* 1.2.3 Computing the number of possible reference block within the
|
||||
* lane.
|
||||
*/
|
||||
position.index = i;
|
||||
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
|
||||
ref_lane == position.lane);
|
||||
|
||||
/* 2 Creating a new block */
|
||||
ref_block =
|
||||
instance->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->memory + curr_offset;
|
||||
if (ARGON2_VERSION_10 == instance->version) {
|
||||
/* version 1.2.1 and earlier: overwrite, not XOR */
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
if (0 == position.pass) {
|
||||
fill_block(state, ref_block, curr_block, 0);
|
||||
}
|
||||
else {
|
||||
fill_block(state, ref_block, curr_block, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
48
randomx/asm/configuration.asm
Normal file
48
randomx/asm/configuration.asm
Normal file
@@ -0,0 +1,48 @@
|
||||
; File start: ..\src\configuration.h
|
||||
RANDOMX_ARGON_MEMORY EQU 262144t
|
||||
RANDOMX_ARGON_ITERATIONS EQU 3t
|
||||
RANDOMX_ARGON_LANES EQU 1t
|
||||
RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03">
|
||||
RANDOMX_CACHE_ACCESSES EQU 8t
|
||||
RANDOMX_SUPERSCALAR_LATENCY EQU 170t
|
||||
RANDOMX_DATASET_BASE_SIZE EQU 2147483648t
|
||||
RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t
|
||||
RANDOMX_PROGRAM_SIZE EQU 256t
|
||||
RANDOMX_PROGRAM_ITERATIONS EQU 2048t
|
||||
RANDOMX_PROGRAM_COUNT EQU 8t
|
||||
RANDOMX_SCRATCHPAD_L3 EQU 2097152t
|
||||
RANDOMX_SCRATCHPAD_L2 EQU 262144t
|
||||
RANDOMX_SCRATCHPAD_L1 EQU 16384t
|
||||
RANDOMX_JUMP_BITS EQU 8t
|
||||
RANDOMX_JUMP_OFFSET EQU 8t
|
||||
RANDOMX_FREQ_IADD_RS EQU 16t
|
||||
RANDOMX_FREQ_IADD_M EQU 7t
|
||||
RANDOMX_FREQ_ISUB_R EQU 16t
|
||||
RANDOMX_FREQ_ISUB_M EQU 7t
|
||||
RANDOMX_FREQ_IMUL_R EQU 16t
|
||||
RANDOMX_FREQ_IMUL_M EQU 4t
|
||||
RANDOMX_FREQ_IMULH_R EQU 4t
|
||||
RANDOMX_FREQ_IMULH_M EQU 1t
|
||||
RANDOMX_FREQ_ISMULH_R EQU 4t
|
||||
RANDOMX_FREQ_ISMULH_M EQU 1t
|
||||
RANDOMX_FREQ_IMUL_RCP EQU 8t
|
||||
RANDOMX_FREQ_INEG_R EQU 2t
|
||||
RANDOMX_FREQ_IXOR_R EQU 15t
|
||||
RANDOMX_FREQ_IXOR_M EQU 5t
|
||||
RANDOMX_FREQ_IROR_R EQU 8t
|
||||
RANDOMX_FREQ_IROL_R EQU 2t
|
||||
RANDOMX_FREQ_ISWAP_R EQU 4t
|
||||
RANDOMX_FREQ_FSWAP_R EQU 4t
|
||||
RANDOMX_FREQ_FADD_R EQU 16t
|
||||
RANDOMX_FREQ_FADD_M EQU 5t
|
||||
RANDOMX_FREQ_FSUB_R EQU 16t
|
||||
RANDOMX_FREQ_FSUB_M EQU 5t
|
||||
RANDOMX_FREQ_FSCAL_R EQU 6t
|
||||
RANDOMX_FREQ_FMUL_R EQU 32t
|
||||
RANDOMX_FREQ_FDIV_M EQU 4t
|
||||
RANDOMX_FREQ_FSQRT_R EQU 6t
|
||||
RANDOMX_FREQ_CBRANCH EQU 25t
|
||||
RANDOMX_FREQ_CFROUND EQU 1t
|
||||
RANDOMX_FREQ_ISTORE EQU 16t
|
||||
RANDOMX_FREQ_NOP EQU 0t
|
||||
; File end: ..\src\configuration.h
|
||||
10
randomx/asm/program_epilogue_linux.inc
Normal file
10
randomx/asm/program_epilogue_linux.inc
Normal file
@@ -0,0 +1,10 @@
|
||||
;# restore callee-saved registers - System V AMD64 ABI
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
|
||||
;# program finished
|
||||
ret 0
|
||||
19
randomx/asm/program_epilogue_store.inc
Normal file
19
randomx/asm/program_epilogue_store.inc
Normal file
@@ -0,0 +1,19 @@
|
||||
;# save VM register values
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
movdqa xmmword ptr [rcx+64], xmm0
|
||||
movdqa xmmword ptr [rcx+80], xmm1
|
||||
movdqa xmmword ptr [rcx+96], xmm2
|
||||
movdqa xmmword ptr [rcx+112], xmm3
|
||||
lea rcx, [rcx+64]
|
||||
movdqa xmmword ptr [rcx+64], xmm4
|
||||
movdqa xmmword ptr [rcx+80], xmm5
|
||||
movdqa xmmword ptr [rcx+96], xmm6
|
||||
movdqa xmmword ptr [rcx+112], xmm7
|
||||
24
randomx/asm/program_epilogue_win64.inc
Normal file
24
randomx/asm/program_epilogue_win64.inc
Normal file
@@ -0,0 +1,24 @@
|
||||
;# restore callee-saved registers - Microsoft x64 calling convention
|
||||
movdqu xmm15, xmmword ptr [rsp]
|
||||
movdqu xmm14, xmmword ptr [rsp+16]
|
||||
movdqu xmm13, xmmword ptr [rsp+32]
|
||||
movdqu xmm12, xmmword ptr [rsp+48]
|
||||
movdqu xmm11, xmmword ptr [rsp+64]
|
||||
add rsp, 80
|
||||
movdqu xmm10, xmmword ptr [rsp]
|
||||
movdqu xmm9, xmmword ptr [rsp+16]
|
||||
movdqu xmm8, xmmword ptr [rsp+32]
|
||||
movdqu xmm7, xmmword ptr [rsp+48]
|
||||
movdqu xmm6, xmmword ptr [rsp+64]
|
||||
add rsp, 80
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rbp
|
||||
pop rbx
|
||||
|
||||
;# program finished
|
||||
ret
|
||||
28
randomx/asm/program_loop_load.inc
Normal file
28
randomx/asm/program_loop_load.inc
Normal file
@@ -0,0 +1,28 @@
|
||||
lea rcx, [rsi+rax]
|
||||
push rcx
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
lea rcx, [rsi+rdx]
|
||||
push rcx
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
cvtdq2pd xmm3, qword ptr [rcx+24]
|
||||
cvtdq2pd xmm4, qword ptr [rcx+32]
|
||||
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||
andps xmm4, xmm13
|
||||
andps xmm5, xmm13
|
||||
andps xmm6, xmm13
|
||||
andps xmm7, xmm13
|
||||
orps xmm4, xmm14
|
||||
orps xmm5, xmm14
|
||||
orps xmm6, xmm14
|
||||
orps xmm7, xmm14
|
||||
18
randomx/asm/program_loop_store.inc
Normal file
18
randomx/asm/program_loop_store.inc
Normal file
@@ -0,0 +1,18 @@
|
||||
pop rcx
|
||||
mov qword ptr [rcx+0], r8
|
||||
mov qword ptr [rcx+8], r9
|
||||
mov qword ptr [rcx+16], r10
|
||||
mov qword ptr [rcx+24], r11
|
||||
mov qword ptr [rcx+32], r12
|
||||
mov qword ptr [rcx+40], r13
|
||||
mov qword ptr [rcx+48], r14
|
||||
mov qword ptr [rcx+56], r15
|
||||
pop rcx
|
||||
xorpd xmm0, xmm4
|
||||
xorpd xmm1, xmm5
|
||||
xorpd xmm2, xmm6
|
||||
xorpd xmm3, xmm7
|
||||
movapd xmmword ptr [rcx+0], xmm0
|
||||
movapd xmmword ptr [rcx+16], xmm1
|
||||
movapd xmmword ptr [rcx+32], xmm2
|
||||
movapd xmmword ptr [rcx+48], xmm3
|
||||
34
randomx/asm/program_prologue_linux.inc
Normal file
34
randomx/asm/program_prologue_linux.inc
Normal file
@@ -0,0 +1,34 @@
|
||||
;# callee-saved registers - System V AMD64 ABI
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
|
||||
;# function arguments
|
||||
mov rbx, rcx ;# loop counter
|
||||
push rdi ;# RegisterFile& registerFile
|
||||
mov rcx, rdi
|
||||
mov rbp, qword ptr [rsi] ;# "mx", "ma"
|
||||
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
|
||||
mov rsi, rdx ;# uint8_t* scratchpad
|
||||
|
||||
mov rax, rbp
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
|
||||
;# load constant registers
|
||||
lea rcx, [rcx+120]
|
||||
movapd xmm8, xmmword ptr [rcx+72]
|
||||
movapd xmm9, xmmword ptr [rcx+88]
|
||||
movapd xmm10, xmmword ptr [rcx+104]
|
||||
movapd xmm11, xmmword ptr [rcx+120]
|
||||
47
randomx/asm/program_prologue_win64.inc
Normal file
47
randomx/asm/program_prologue_win64.inc
Normal file
@@ -0,0 +1,47 @@
|
||||
;# callee-saved registers - Microsoft x64 calling convention
|
||||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm6
|
||||
movdqu xmmword ptr [rsp+48], xmm7
|
||||
movdqu xmmword ptr [rsp+32], xmm8
|
||||
movdqu xmmword ptr [rsp+16], xmm9
|
||||
movdqu xmmword ptr [rsp+0], xmm10
|
||||
sub rsp, 80
|
||||
movdqu xmmword ptr [rsp+64], xmm11
|
||||
movdqu xmmword ptr [rsp+48], xmm12
|
||||
movdqu xmmword ptr [rsp+32], xmm13
|
||||
movdqu xmmword ptr [rsp+16], xmm14
|
||||
movdqu xmmword ptr [rsp+0], xmm15
|
||||
|
||||
;# function arguments
|
||||
push rcx ;# RegisterFile& registerFile
|
||||
mov rbp, qword ptr [rdx] ;# "mx", "ma"
|
||||
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
|
||||
mov rsi, r8 ;# uint8_t* scratchpad
|
||||
mov rbx, r9 ;# loop counter
|
||||
|
||||
mov rax, rbp
|
||||
|
||||
;# zero integer registers
|
||||
xor r8, r8
|
||||
xor r9, r9
|
||||
xor r10, r10
|
||||
xor r11, r11
|
||||
xor r12, r12
|
||||
xor r13, r13
|
||||
xor r14, r14
|
||||
xor r15, r15
|
||||
|
||||
;# load constant registers
|
||||
lea rcx, [rcx+120]
|
||||
movapd xmm8, xmmword ptr [rcx+72]
|
||||
movapd xmm9, xmmword ptr [rcx+88]
|
||||
movapd xmm10, xmmword ptr [rcx+104]
|
||||
movapd xmm11, xmmword ptr [rcx+120]
|
||||
17
randomx/asm/program_read_dataset.inc
Normal file
17
randomx/asm/program_read_dataset.inc
Normal file
@@ -0,0 +1,17 @@
|
||||
xor rbp, rax ;# modify "mx"
|
||||
mov edx, ebp ;# edx = mx
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
prefetchnta byte ptr [rdi+rdx]
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov edx, ebp ;# edx = ma
|
||||
and edx, RANDOMX_DATASET_BASE_MASK
|
||||
lea rcx, [rdi+rdx] ;# dataset cache line
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
|
||||
10
randomx/asm/program_read_dataset_sshash_fin.inc
Normal file
10
randomx/asm/program_read_dataset_sshash_fin.inc
Normal file
@@ -0,0 +1,10 @@
|
||||
mov rbx, qword ptr [rsp+64]
|
||||
xor r8, qword ptr [rsp+56]
|
||||
xor r9, qword ptr [rsp+48]
|
||||
xor r10, qword ptr [rsp+40]
|
||||
xor r11, qword ptr [rsp+32]
|
||||
xor r12, qword ptr [rsp+24]
|
||||
xor r13, qword ptr [rsp+16]
|
||||
xor r14, qword ptr [rsp+8]
|
||||
xor r15, qword ptr [rsp+0]
|
||||
add rsp, 72
|
||||
17
randomx/asm/program_read_dataset_sshash_init.inc
Normal file
17
randomx/asm/program_read_dataset_sshash_init.inc
Normal file
@@ -0,0 +1,17 @@
|
||||
sub rsp, 72
|
||||
mov qword ptr [rsp+64], rbx
|
||||
mov qword ptr [rsp+56], r8
|
||||
mov qword ptr [rsp+48], r9
|
||||
mov qword ptr [rsp+40], r10
|
||||
mov qword ptr [rsp+32], r11
|
||||
mov qword ptr [rsp+24], r12
|
||||
mov qword ptr [rsp+16], r13
|
||||
mov qword ptr [rsp+8], r14
|
||||
mov qword ptr [rsp+0], r15
|
||||
xor rbp, rax ;# modify "mx"
|
||||
ror rbp, 32 ;# swap "ma" and "mx"
|
||||
mov ebx, ebp ;# ecx = ma
|
||||
and ebx, RANDOMX_DATASET_BASE_MASK
|
||||
shr ebx, 6 ;# ebx = Dataset block number
|
||||
;# add ebx, datasetOffset / 64
|
||||
;# call 32768
|
||||
24
randomx/asm/program_sshash_constants.inc
Normal file
24
randomx/asm/program_sshash_constants.inc
Normal file
@@ -0,0 +1,24 @@
|
||||
r0_mul:
|
||||
;#/ 6364136223846793005
|
||||
db 45, 127, 149, 76, 45, 244, 81, 88
|
||||
r1_add:
|
||||
;#/ 9298411001130361340
|
||||
db 252, 161, 245, 89, 138, 151, 10, 129
|
||||
r2_add:
|
||||
;#/ 12065312585734608966
|
||||
db 70, 216, 194, 56, 223, 153, 112, 167
|
||||
r3_add:
|
||||
;#/ 9306329213124626780
|
||||
db 92, 73, 34, 191, 28, 185, 38, 129
|
||||
r4_add:
|
||||
;#/ 5281919268842080866
|
||||
db 98, 138, 159, 23, 151, 37, 77, 73
|
||||
r5_add:
|
||||
;#/ 10536153434571861004
|
||||
db 12, 236, 170, 206, 185, 239, 55, 146
|
||||
r6_add:
|
||||
;#/ 3398623926847679864
|
||||
db 120, 45, 230, 108, 116, 86, 42, 47
|
||||
r7_add:
|
||||
;#/ 9549104520008361294
|
||||
db 78, 229, 44, 182, 247, 59, 133, 132
|
||||
8
randomx/asm/program_sshash_load.inc
Normal file
8
randomx/asm/program_sshash_load.inc
Normal file
@@ -0,0 +1,8 @@
|
||||
xor r8, qword ptr [rbx+0]
|
||||
xor r9, qword ptr [rbx+8]
|
||||
xor r10, qword ptr [rbx+16]
|
||||
xor r11, qword ptr [rbx+24]
|
||||
xor r12, qword ptr [rbx+32]
|
||||
xor r13, qword ptr [rbx+40]
|
||||
xor r14, qword ptr [rbx+48]
|
||||
xor r15, qword ptr [rbx+56]
|
||||
4
randomx/asm/program_sshash_prefetch.inc
Normal file
4
randomx/asm/program_sshash_prefetch.inc
Normal file
@@ -0,0 +1,4 @@
|
||||
and rbx, RANDOMX_CACHE_MASK
|
||||
shl rbx, 6
|
||||
add rbx, rdi
|
||||
prefetchnta byte ptr [rbx]
|
||||
6
randomx/asm/program_xmm_constants.inc
Normal file
6
randomx/asm/program_xmm_constants.inc
Normal file
@@ -0,0 +1,6 @@
|
||||
mantissaMask:
|
||||
db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
|
||||
exp240:
|
||||
db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
scaleMask:
|
||||
db 0, 0, 0, 0, 0, 0, 240, 128, 0, 0, 0, 0, 0, 0, 240, 128
|
||||
7
randomx/asm/randomx_reciprocal.inc
Normal file
7
randomx/asm/randomx_reciprocal.inc
Normal file
@@ -0,0 +1,7 @@
|
||||
mov edx, 1
|
||||
mov r8, rcx
|
||||
xor eax, eax
|
||||
bsr rcx, rcx
|
||||
shl rdx, cl
|
||||
div r8
|
||||
ret
|
||||
612
randomx/assembly_generator_x86.cpp
Normal file
612
randomx/assembly_generator_x86.cpp
Normal file
@@ -0,0 +1,612 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include "assembly_generator_x86.hpp"
|
||||
#include "common.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "program.hpp"
|
||||
#include "superscalar.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
|
||||
static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
|
||||
static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||
static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
|
||||
static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
|
||||
static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
|
||||
|
||||
static const char* tempRegx = "xmm12";
|
||||
static const char* mantissaMaskReg = "xmm13";
|
||||
static const char* exponentMaskReg = "xmm14";
|
||||
static const char* scaleMaskReg = "xmm15";
|
||||
static const char* regIc = "rbx";
|
||||
static const char* regIc32 = "ebx";
|
||||
static const char* regIc8 = "bl";
|
||||
static const char* regScratchpadAddr = "rsi";
|
||||
|
||||
void AssemblyGeneratorX86::generateProgram(Program& prog) {
|
||||
//printf("---\n");
|
||||
for (unsigned i = 0; i < RegistersCount; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
asmCode.str(std::string()); //clear
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
asmCode << "randomx_isn_" << i << ":" << std::endl;
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) {
|
||||
asmCode.str(std::string()); //clear
|
||||
#ifdef RANDOMX_ALIGN
|
||||
asmCode << "ALIGN 16" << std::endl;
|
||||
#endif
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
switch ((SuperscalarInstructionType)instr.opcode)
|
||||
{
|
||||
case SuperscalarInstructionType::ISUB_R:
|
||||
asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IXOR_R:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IADD_RS:
|
||||
asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IMUL_R:
|
||||
asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IROR_C:
|
||||
asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IADD_C7:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IXOR_C7:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IADD_C8:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
#ifdef RANDOMX_ALIGN
|
||||
asmCode << "nop" << std::endl;
|
||||
#endif
|
||||
break;
|
||||
case SuperscalarInstructionType::IXOR_C8:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
#ifdef RANDOMX_ALIGN
|
||||
asmCode << "nop" << std::endl;
|
||||
#endif
|
||||
break;
|
||||
case SuperscalarInstructionType::IADD_C9:
|
||||
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
#ifdef RANDOMX_ALIGN
|
||||
asmCode << "xchg ax, ax ;nop" << std::endl;
|
||||
#endif
|
||||
break;
|
||||
case SuperscalarInstructionType::IXOR_C9:
|
||||
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
#ifdef RANDOMX_ALIGN
|
||||
asmCode << "xchg ax, ax ;nop" << std::endl;
|
||||
#endif
|
||||
break;
|
||||
case SuperscalarInstructionType::IMULH_R:
|
||||
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "mul " << regR[instr.src] << std::endl;
|
||||
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::ISMULH_R:
|
||||
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "imul " << regR[instr.src] << std::endl;
|
||||
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IMUL_RCP:
|
||||
asmCode << "mov rax, " << (int64_t)randomx_reciprocal(instr.getImm32()) << std::endl;
|
||||
asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) {
|
||||
asmCode.str(std::string()); //clear
|
||||
asmCode << "#include <stdint.h>" << std::endl;
|
||||
asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;
|
||||
asmCode << " static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
|
||||
asmCode << " return ((unsigned __int128)a * b) >> 64;" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
|
||||
asmCode << " return ((__int128)a * b) >> 64;" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_MULH" << std::endl;
|
||||
asmCode << " #define HAVE_SMULH" << std::endl;
|
||||
asmCode << "#endif" << std::endl;
|
||||
asmCode << "#if defined(_MSC_VER)" << std::endl;
|
||||
asmCode << " #define HAS_VALUE(X) X ## 0" << std::endl;
|
||||
asmCode << " #define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl;
|
||||
asmCode << " #include <intrin.h>" << std::endl;
|
||||
asmCode << " #include <stdlib.h>" << std::endl;
|
||||
asmCode << " static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl;
|
||||
asmCode << " return _rotr64(x, c);" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_ROTR" << std::endl;
|
||||
asmCode << " #if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl;
|
||||
asmCode << " static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
|
||||
asmCode << " return __umulh(a, b);" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_MULH" << std::endl;
|
||||
asmCode << " #endif" << std::endl;
|
||||
asmCode << " #if EVAL_DEFINE(__MACHINEX64(1))" << std::endl;
|
||||
asmCode << " static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
|
||||
asmCode << " int64_t hi;" << std::endl;
|
||||
asmCode << " _mul128(a, b, &hi);" << std::endl;
|
||||
asmCode << " return hi;" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_SMULH" << std::endl;
|
||||
asmCode << " #endif" << std::endl;
|
||||
asmCode << "#endif" << std::endl;
|
||||
asmCode << "#ifndef HAVE_ROTR" << std::endl;
|
||||
asmCode << " static inline uint64_t rotr(uint64_t a, int b) {" << std::endl;
|
||||
asmCode << " return (a >> b) | (a << (64 - b));" << std::endl;
|
||||
asmCode << " }" << std::endl;
|
||||
asmCode << " #define HAVE_ROTR" << std::endl;
|
||||
asmCode << "#endif" << std::endl;
|
||||
asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl;
|
||||
asmCode << " #error \"Required functions are not defined\"" << std::endl;
|
||||
asmCode << "#endif" << std::endl;
|
||||
asmCode << "void superScalar(uint64_t r[8]) {" << std::endl;
|
||||
asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl;
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
switch ((SuperscalarInstructionType)instr.opcode)
|
||||
{
|
||||
case SuperscalarInstructionType::ISUB_R:
|
||||
asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IXOR_R:
|
||||
asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IADD_RS:
|
||||
asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift())) << ";" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IMUL_R:
|
||||
asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IROR_C:
|
||||
asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IADD_C7:
|
||||
case SuperscalarInstructionType::IADD_C8:
|
||||
case SuperscalarInstructionType::IADD_C9:
|
||||
asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IXOR_C7:
|
||||
case SuperscalarInstructionType::IXOR_C8:
|
||||
case SuperscalarInstructionType::IXOR_C9:
|
||||
asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IMULH_R:
|
||||
asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::ISMULH_R:
|
||||
asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
|
||||
break;
|
||||
case SuperscalarInstructionType::IMUL_RCP:
|
||||
asmCode << regR[instr.dst] << " *= " << (int64_t)randomx_reciprocal(instr.getImm32()) << ";" << std::endl;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl;
|
||||
asmCode << "}" << std::endl;
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::traceint(Instruction& instr) {
|
||||
if (trace) {
|
||||
asmCode << "\tpush " << regR[instr.dst] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::traceflt(Instruction& instr) {
|
||||
if (trace) {
|
||||
asmCode << "\tpush 0" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::tracenop(Instruction& instr) {
|
||||
if (trace) {
|
||||
asmCode << "\tpush 0" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
|
||||
asmCode << "\t; " << instr;
|
||||
auto generator = engine[instr.opcode];
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
|
||||
asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||
asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
|
||||
asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||
int mask;
|
||||
if (instr.getModCond() < StoreL3Condition) {
|
||||
mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask;
|
||||
}
|
||||
else {
|
||||
mask = ScratchpadL3Mask;
|
||||
}
|
||||
asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl;
|
||||
}
|
||||
|
||||
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
|
||||
return (int32_t)instr.getImm32() & ScratchpadL3Mask;
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if(instr.dst == RegisterNeedsDisplacement)
|
||||
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
|
||||
else
|
||||
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\tsub " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\timul " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\tmul " << regR[instr.src] << std::endl;
|
||||
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, "ecx");
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
||||
}
|
||||
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\timul " << regR[instr.src] << std::endl;
|
||||
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, "ecx");
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\timul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
|
||||
asmCode << "\timul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
||||
}
|
||||
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tneg " << regR[instr.dst] << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\txor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
|
||||
asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\tror " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
|
||||
asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl;
|
||||
}
|
||||
else {
|
||||
asmCode << "\trol " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
|
||||
}
|
||||
traceint(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
|
||||
uint64_t divisor = instr.getImm32();
|
||||
if (!isZeroOrPowerOf2(divisor)) {
|
||||
registerUsage[instr.dst] = i;
|
||||
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
|
||||
asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
else {
|
||||
tracenop(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
|
||||
if (instr.src != instr.dst) {
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[instr.src] = i;
|
||||
asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
|
||||
traceint(instr);
|
||||
}
|
||||
else {
|
||||
tracenop(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) {
|
||||
asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
|
||||
asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl;
|
||||
asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl;
|
||||
asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
|
||||
traceflt(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
|
||||
asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
|
||||
int rotate = (13 - (instr.getImm32() & 63)) & 63;
|
||||
if (rotate != 0)
|
||||
asmCode << "\trol rax, " << rotate << std::endl;
|
||||
asmCode << "\tand eax, 24576" << std::endl;
|
||||
asmCode << "\tor eax, 40896" << std::endl;
|
||||
asmCode << "\tpush rax" << std::endl;
|
||||
asmCode << "\tldmxcsr dword ptr [rsp]" << std::endl;
|
||||
asmCode << "\tpop rax" << std::endl;
|
||||
tracenop(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) {
|
||||
int reg = instr.dst;
|
||||
int target = registerUsage[reg] + 1;
|
||||
int shift = instr.getModCond() + ConditionOffset;
|
||||
int32_t imm = instr.getImm32() | (1L << shift);
|
||||
if (ConditionOffset > 0 || shift > 0)
|
||||
imm &= ~(1L << (shift - 1));
|
||||
asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl;
|
||||
asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl;
|
||||
asmCode << "\tjz randomx_isn_" << target << std::endl;
|
||||
//mark all registers as used
|
||||
for (unsigned j = 0; j < RegistersCount; ++j) {
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr);
|
||||
asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl;
|
||||
tracenop(instr);
|
||||
}
|
||||
|
||||
void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) {
|
||||
asmCode << "\tnop" << std::endl;
|
||||
tracenop(instr);
|
||||
}
|
||||
|
||||
#include "instruction_weights.hpp"
|
||||
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
|
||||
|
||||
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
|
||||
INST_HANDLE(IADD_RS)
|
||||
INST_HANDLE(IADD_M)
|
||||
INST_HANDLE(ISUB_R)
|
||||
INST_HANDLE(ISUB_M)
|
||||
INST_HANDLE(IMUL_R)
|
||||
INST_HANDLE(IMUL_M)
|
||||
INST_HANDLE(IMULH_R)
|
||||
INST_HANDLE(IMULH_M)
|
||||
INST_HANDLE(ISMULH_R)
|
||||
INST_HANDLE(ISMULH_M)
|
||||
INST_HANDLE(IMUL_RCP)
|
||||
INST_HANDLE(INEG_R)
|
||||
INST_HANDLE(IXOR_R)
|
||||
INST_HANDLE(IXOR_M)
|
||||
INST_HANDLE(IROR_R)
|
||||
INST_HANDLE(IROL_R)
|
||||
INST_HANDLE(ISWAP_R)
|
||||
INST_HANDLE(FSWAP_R)
|
||||
INST_HANDLE(FADD_R)
|
||||
INST_HANDLE(FADD_M)
|
||||
INST_HANDLE(FSUB_R)
|
||||
INST_HANDLE(FSUB_M)
|
||||
INST_HANDLE(FSCAL_R)
|
||||
INST_HANDLE(FMUL_R)
|
||||
INST_HANDLE(FDIV_M)
|
||||
INST_HANDLE(FSQRT_R)
|
||||
INST_HANDLE(CBRANCH)
|
||||
INST_HANDLE(CFROUND)
|
||||
INST_HANDLE(ISTORE)
|
||||
INST_HANDLE(NOP)
|
||||
};
|
||||
}
|
||||
94
randomx/assembly_generator_x86.hpp
Normal file
94
randomx/assembly_generator_x86.hpp
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.hpp"
|
||||
#include <sstream>
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class SuperscalarProgram;
|
||||
class AssemblyGeneratorX86;
|
||||
class Instruction;
|
||||
|
||||
typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int);
|
||||
|
||||
class AssemblyGeneratorX86 {
|
||||
public:
|
||||
void generateProgram(Program& prog);
|
||||
void generateAsm(SuperscalarProgram& prog);
|
||||
void generateC(SuperscalarProgram& prog);
|
||||
void printCode(std::ostream& os) {
|
||||
os << asmCode.rdbuf();
|
||||
}
|
||||
private:
|
||||
void genAddressReg(Instruction&, const char*);
|
||||
void genAddressRegDst(Instruction&, int);
|
||||
int32_t genAddressImm(Instruction&);
|
||||
void generateCode(Instruction&, int);
|
||||
void traceint(Instruction&);
|
||||
void traceflt(Instruction&);
|
||||
void tracenop(Instruction&);
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
void h_ISUB_M(Instruction&, int);
|
||||
void h_IMUL_R(Instruction&, int);
|
||||
void h_IMUL_M(Instruction&, int);
|
||||
void h_IMULH_R(Instruction&, int);
|
||||
void h_IMULH_M(Instruction&, int);
|
||||
void h_ISMULH_R(Instruction&, int);
|
||||
void h_ISMULH_M(Instruction&, int);
|
||||
void h_IMUL_RCP(Instruction&, int);
|
||||
void h_INEG_R(Instruction&, int);
|
||||
void h_IXOR_R(Instruction&, int);
|
||||
void h_IXOR_M(Instruction&, int);
|
||||
void h_IROR_R(Instruction&, int);
|
||||
void h_IROL_R(Instruction&, int);
|
||||
void h_ISWAP_R(Instruction&, int);
|
||||
void h_FSWAP_R(Instruction&, int);
|
||||
void h_FADD_R(Instruction&, int);
|
||||
void h_FADD_M(Instruction&, int);
|
||||
void h_FSUB_R(Instruction&, int);
|
||||
void h_FSUB_M(Instruction&, int);
|
||||
void h_FSCAL_R(Instruction&, int);
|
||||
void h_FMUL_R(Instruction&, int);
|
||||
void h_FDIV_M(Instruction&, int);
|
||||
void h_FSQRT_R(Instruction&, int);
|
||||
void h_CBRANCH(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
|
||||
static InstructionGenerator engine[256];
|
||||
std::stringstream asmCode;
|
||||
int registerUsage[RegistersCount];
|
||||
};
|
||||
}
|
||||
76
randomx/blake2/blake2-impl.h
Normal file
76
randomx/blake2/blake2-impl.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef PORTABLE_BLAKE2_IMPL_H
|
||||
#define PORTABLE_BLAKE2_IMPL_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "endian.h"
|
||||
|
||||
static FORCE_INLINE uint64_t load48(const void *src) {
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
return w;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store48(void *dst, uint64_t w) {
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (32 - c));
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (64 - c));
|
||||
}
|
||||
|
||||
#endif
|
||||
116
randomx/blake2/blake2.h
Normal file
116
randomx/blake2/blake2.h
Normal file
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef PORTABLE_BLAKE2_H
|
||||
#define PORTABLE_BLAKE2_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum blake2b_constant {
|
||||
BLAKE2B_BLOCKBYTES = 128,
|
||||
BLAKE2B_OUTBYTES = 64,
|
||||
BLAKE2B_KEYBYTES = 64,
|
||||
BLAKE2B_SALTBYTES = 16,
|
||||
BLAKE2B_PERSONALBYTES = 16
|
||||
};
|
||||
|
||||
#pragma pack(push, 1)
|
||||
typedef struct __blake2b_param {
|
||||
uint8_t digest_length; /* 1 */
|
||||
uint8_t key_length; /* 2 */
|
||||
uint8_t fanout; /* 3 */
|
||||
uint8_t depth; /* 4 */
|
||||
uint32_t leaf_length; /* 8 */
|
||||
uint64_t node_offset; /* 16 */
|
||||
uint8_t node_depth; /* 17 */
|
||||
uint8_t inner_length; /* 18 */
|
||||
uint8_t reserved[14]; /* 32 */
|
||||
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
|
||||
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
|
||||
} blake2b_param;
|
||||
#pragma pack(pop)
|
||||
|
||||
typedef struct __blake2b_state {
|
||||
uint64_t h[8];
|
||||
uint64_t t[2];
|
||||
uint64_t f[2];
|
||||
uint8_t buf[BLAKE2B_BLOCKBYTES];
|
||||
unsigned buflen;
|
||||
unsigned outlen;
|
||||
uint8_t last_node;
|
||||
} blake2b_state;
|
||||
|
||||
/* Ensure param structs have not been wrongly padded */
|
||||
/* Poor man's static_assert */
|
||||
enum {
|
||||
blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
|
||||
blake2_size_check_2 =
|
||||
1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
|
||||
};
|
||||
|
||||
//randomx namespace
|
||||
#define blake2b_init randomx_blake2b_init
|
||||
#define blake2b_init_key randomx_blake2b_init_key
|
||||
#define blake2b_init_param randomx_blake2b_init_param
|
||||
#define blake2b_update randomx_blake2b_update
|
||||
#define blake2b_final randomx_blake2b_final
|
||||
#define blake2b randomx_blake2b
|
||||
#define blake2b_long randomx_blake2b_long
|
||||
|
||||
/* Streaming API */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen);
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
|
||||
size_t keylen);
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
|
||||
|
||||
/* Simple API */
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen);
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
409
randomx/blake2/blake2b.c
Normal file
409
randomx/blake2/blake2b.c
Normal file
@@ -0,0 +1,409 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
static const uint64_t blake2b_IV[8] = {
|
||||
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
|
||||
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
|
||||
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
|
||||
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
|
||||
|
||||
static const unsigned int blake2b_sigma[12][16] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
|
||||
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
|
||||
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
|
||||
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
|
||||
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
|
||||
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
|
||||
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
|
||||
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
|
||||
};
|
||||
|
||||
static FORCE_INLINE void blake2b_set_lastnode(blake2b_state *S) {
|
||||
S->f[1] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void blake2b_set_lastblock(blake2b_state *S) {
|
||||
if (S->last_node) {
|
||||
blake2b_set_lastnode(S);
|
||||
}
|
||||
S->f[0] = (uint64_t)-1;
|
||||
}
|
||||
|
||||
static FORCE_INLINE void blake2b_increment_counter(blake2b_state *S,
|
||||
uint64_t inc) {
|
||||
S->t[0] += inc;
|
||||
S->t[1] += (S->t[0] < inc);
|
||||
}
|
||||
|
||||
static FORCE_INLINE void blake2b_invalidate_state(blake2b_state *S) {
|
||||
//clear_internal_memory(S, sizeof(*S)); /* wipe */
|
||||
blake2b_set_lastblock(S); /* invalidate for further use */
|
||||
}
|
||||
|
||||
static FORCE_INLINE void blake2b_init0(blake2b_state *S) {
|
||||
memset(S, 0, sizeof(*S));
|
||||
memcpy(S->h, blake2b_IV, sizeof(S->h));
|
||||
}
|
||||
|
||||
int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
|
||||
const unsigned char *p = (const unsigned char *)P;
|
||||
unsigned int i;
|
||||
|
||||
if (NULL == P || NULL == S) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_init0(S);
|
||||
/* IV XOR Parameter Block */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
|
||||
}
|
||||
S->outlen = P->digest_length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sequential blake2b initialization */
|
||||
int blake2b_init(blake2b_state *S, size_t outlen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for unkeyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = 0;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
memset(P.reserved, 0, sizeof(P.reserved));
|
||||
memset(P.salt, 0, sizeof(P.salt));
|
||||
memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
return blake2b_init_param(S, &P);
|
||||
}
|
||||
|
||||
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) {
|
||||
blake2b_param P;
|
||||
|
||||
if (S == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Setup Parameter Block for keyed BLAKE2 */
|
||||
P.digest_length = (uint8_t)outlen;
|
||||
P.key_length = (uint8_t)keylen;
|
||||
P.fanout = 1;
|
||||
P.depth = 1;
|
||||
P.leaf_length = 0;
|
||||
P.node_offset = 0;
|
||||
P.node_depth = 0;
|
||||
P.inner_length = 0;
|
||||
memset(P.reserved, 0, sizeof(P.reserved));
|
||||
memset(P.salt, 0, sizeof(P.salt));
|
||||
memset(P.personal, 0, sizeof(P.personal));
|
||||
|
||||
if (blake2b_init_param(S, &P) < 0) {
|
||||
blake2b_invalidate_state(S);
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
uint8_t block[BLAKE2B_BLOCKBYTES];
|
||||
memset(block, 0, BLAKE2B_BLOCKBYTES);
|
||||
memcpy(block, key, keylen);
|
||||
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
|
||||
/* Burn the key from stack */
|
||||
//clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
|
||||
uint64_t m[16];
|
||||
uint64_t v[16];
|
||||
unsigned int i, r;
|
||||
|
||||
for (i = 0; i < 16; ++i) {
|
||||
m[i] = load64(block + i * sizeof(m[i]));
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
v[i] = S->h[i];
|
||||
}
|
||||
|
||||
v[8] = blake2b_IV[0];
|
||||
v[9] = blake2b_IV[1];
|
||||
v[10] = blake2b_IV[2];
|
||||
v[11] = blake2b_IV[3];
|
||||
v[12] = blake2b_IV[4] ^ S->t[0];
|
||||
v[13] = blake2b_IV[5] ^ S->t[1];
|
||||
v[14] = blake2b_IV[6] ^ S->f[0];
|
||||
v[15] = blake2b_IV[7] ^ S->f[1];
|
||||
|
||||
#define G(r, i, a, b, c, d) \
|
||||
do { \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = c + d; \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define ROUND(r) \
|
||||
do { \
|
||||
G(r, 0, v[0], v[4], v[8], v[12]); \
|
||||
G(r, 1, v[1], v[5], v[9], v[13]); \
|
||||
G(r, 2, v[2], v[6], v[10], v[14]); \
|
||||
G(r, 3, v[3], v[7], v[11], v[15]); \
|
||||
G(r, 4, v[0], v[5], v[10], v[15]); \
|
||||
G(r, 5, v[1], v[6], v[11], v[12]); \
|
||||
G(r, 6, v[2], v[7], v[8], v[13]); \
|
||||
G(r, 7, v[3], v[4], v[9], v[14]); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
for (r = 0; r < 12; ++r) {
|
||||
ROUND(r);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
|
||||
}
|
||||
|
||||
#undef G
|
||||
#undef ROUND
|
||||
}
|
||||
|
||||
int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
|
||||
const uint8_t *pin = (const uint8_t *)in;
|
||||
|
||||
if (inlen == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Sanity check */
|
||||
if (S == NULL || in == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { //BLAKE2B_BLOCKBYTES =128
|
||||
/* Complete current block */
|
||||
size_t left = S->buflen;
|
||||
size_t fill = BLAKE2B_BLOCKBYTES - left;
|
||||
memcpy(&S->buf[left], pin, fill);
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, S->buf);
|
||||
S->buflen = 0;
|
||||
inlen -= fill;
|
||||
pin += fill;
|
||||
/* Avoid buffer copies when possible */
|
||||
while (inlen > BLAKE2B_BLOCKBYTES) {
|
||||
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
|
||||
blake2b_compress(S, pin);
|
||||
inlen -= BLAKE2B_BLOCKBYTES;
|
||||
pin += BLAKE2B_BLOCKBYTES;
|
||||
}
|
||||
}
|
||||
memcpy(&S->buf[S->buflen], pin, inlen);
|
||||
S->buflen += (unsigned int)inlen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
|
||||
uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 };
|
||||
unsigned int i;
|
||||
|
||||
/* Sanity checks */
|
||||
if (S == NULL || out == NULL || outlen < S->outlen) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Is this a reused state? */
|
||||
if (S->f[0] != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
blake2b_increment_counter(S, S->buflen);
|
||||
blake2b_set_lastblock(S);
|
||||
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
|
||||
blake2b_compress(S, S->buf);
|
||||
|
||||
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
|
||||
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
|
||||
}
|
||||
|
||||
memcpy(out, buffer, S->outlen);
|
||||
//clear_internal_memory(buffer, sizeof(buffer));
|
||||
//clear_internal_memory(S->buf, sizeof(S->buf));
|
||||
//clear_internal_memory(S->h, sizeof(S->h));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
|
||||
const void *key, size_t keylen) {
|
||||
blake2b_state S;
|
||||
int ret = -1;
|
||||
|
||||
/* Verify parameters */
|
||||
if (NULL == in && inlen > 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (keylen > 0) {
|
||||
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (blake2b_init(&S, outlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
if (blake2b_update(&S, in, inlen) < 0) {
|
||||
goto fail;
|
||||
}
|
||||
ret = blake2b_final(&S, out, outlen);
|
||||
|
||||
fail:
|
||||
//clear_internal_memory(&S, sizeof(S));
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
|
||||
uint8_t *out = (uint8_t *)pout;
|
||||
blake2b_state blake_state;
|
||||
uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 };
|
||||
int ret = -1;
|
||||
|
||||
if (outlen > UINT32_MAX) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* Ensure little-endian byte order! */
|
||||
store32(outlen_bytes, (uint32_t)outlen);
|
||||
|
||||
#define TRY(statement) \
|
||||
do { \
|
||||
ret = statement; \
|
||||
if (ret < 0) { \
|
||||
goto fail; \
|
||||
} \
|
||||
} while ((void)0, 0)
|
||||
|
||||
if (outlen <= BLAKE2B_OUTBYTES) {
|
||||
TRY(blake2b_init(&blake_state, outlen));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out, outlen));
|
||||
}
|
||||
else {
|
||||
uint32_t toproduce;
|
||||
uint8_t out_buffer[BLAKE2B_OUTBYTES];
|
||||
uint8_t in_buffer[BLAKE2B_OUTBYTES];
|
||||
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
|
||||
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
|
||||
TRY(blake2b_update(&blake_state, in, inlen));
|
||||
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
|
||||
|
||||
while (toproduce > BLAKE2B_OUTBYTES) {
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
|
||||
BLAKE2B_OUTBYTES, NULL, 0));
|
||||
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
|
||||
out += BLAKE2B_OUTBYTES / 2;
|
||||
toproduce -= BLAKE2B_OUTBYTES / 2;
|
||||
}
|
||||
|
||||
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
|
||||
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
|
||||
0));
|
||||
memcpy(out, out_buffer, toproduce);
|
||||
}
|
||||
fail:
|
||||
//clear_internal_memory(&blake_state, sizeof(blake_state));
|
||||
return ret;
|
||||
#undef TRY
|
||||
}
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
189
randomx/blake2/blamka-round-avx2.h
Normal file
189
randomx/blake2/blamka-round-avx2.h
Normal file
@@ -0,0 +1,189 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_OPT_H
|
||||
#define BLAKE_ROUND_MKA_OPT_H
|
||||
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
||||
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
|
||||
|
||||
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr32(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
\
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr24(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D1 = rotr32(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
\
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B1 = rotr24(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i ml = _mm256_mul_epu32(A0, B0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
|
||||
D0 = _mm256_xor_si256(D0, A0); \
|
||||
D0 = rotr16(D0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C0, D0); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
|
||||
B0 = _mm256_xor_si256(B0, C0); \
|
||||
B0 = rotr63(B0); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(A1, B1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
|
||||
D1 = _mm256_xor_si256(D1, A1); \
|
||||
D1 = rotr16(D1); \
|
||||
\
|
||||
ml = _mm256_mul_epu32(C1, D1); \
|
||||
ml = _mm256_add_epi64(ml, ml); \
|
||||
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
|
||||
B1 = _mm256_xor_si256(B1, C1); \
|
||||
B1 = rotr63(B1); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
\
|
||||
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while(0);
|
||||
|
||||
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
\
|
||||
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
|
||||
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
|
||||
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
|
||||
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
|
||||
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
\
|
||||
tmp1 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = tmp1; \
|
||||
\
|
||||
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
|
||||
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
|
||||
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
|
||||
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do{ \
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
\
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
} while((void)0, 0);
|
||||
|
||||
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do{ \
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
\
|
||||
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
} while((void)0, 0);
|
||||
|
||||
#endif /* BLAKE_ROUND_MKA_OPT_H */
|
||||
73
randomx/blake2/blamka-round-ref.h
Normal file
73
randomx/blake2/blamka-round-ref.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_H
|
||||
#define BLAKE_ROUND_MKA_H
|
||||
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
/* designed by the Lyra PHC team */
|
||||
static FORCE_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
|
||||
const uint64_t m = UINT64_C(0xFFFFFFFF);
|
||||
const uint64_t xy = (x & m) * (y & m);
|
||||
return x + y + 2 * xy;
|
||||
}
|
||||
|
||||
#define G(a, b, c, d) \
|
||||
do { \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 32); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 24); \
|
||||
a = fBlaMka(a, b); \
|
||||
d = rotr64(d ^ a, 16); \
|
||||
c = fBlaMka(c, d); \
|
||||
b = rotr64(b ^ c, 63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
|
||||
v12, v13, v14, v15) \
|
||||
do { \
|
||||
G(v0, v4, v8, v12); \
|
||||
G(v1, v5, v9, v13); \
|
||||
G(v2, v6, v10, v14); \
|
||||
G(v3, v7, v11, v15); \
|
||||
G(v0, v5, v10, v15); \
|
||||
G(v1, v6, v11, v12); \
|
||||
G(v2, v7, v8, v13); \
|
||||
G(v3, v4, v9, v14); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#endif
|
||||
162
randomx/blake2/blamka-round-ssse3.h
Normal file
162
randomx/blake2/blamka-round-ssse3.h
Normal file
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#ifndef BLAKE_ROUND_MKA_OPT_H
|
||||
#define BLAKE_ROUND_MKA_OPT_H
|
||||
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef _mm_roti_epi64 //clang defines it using the XOP instruction set
|
||||
#undef _mm_roti_epi64
|
||||
#endif
|
||||
|
||||
#define r16 \
|
||||
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
|
||||
#define r24 \
|
||||
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
#define _mm_roti_epi64(x, c) \
|
||||
(-(c) == 32) \
|
||||
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
|
||||
: (-(c) == 24) \
|
||||
? _mm_shuffle_epi8((x), r24) \
|
||||
: (-(c) == 16) \
|
||||
? _mm_shuffle_epi8((x), r16) \
|
||||
: (-(c) == 63) \
|
||||
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_add_epi64((x), (x))) \
|
||||
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
|
||||
_mm_slli_epi64((x), 64 - (-(c))))
|
||||
|
||||
static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
const __m128i z = _mm_mul_epu32(x, y);
|
||||
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
|
||||
}
|
||||
|
||||
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -32); \
|
||||
D1 = _mm_roti_epi64(D1, -32); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -24); \
|
||||
B1 = _mm_roti_epi64(B1, -24); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
A0 = fBlaMka(A0, B0); \
|
||||
A1 = fBlaMka(A1, B1); \
|
||||
\
|
||||
D0 = _mm_xor_si128(D0, A0); \
|
||||
D1 = _mm_xor_si128(D1, A1); \
|
||||
\
|
||||
D0 = _mm_roti_epi64(D0, -16); \
|
||||
D1 = _mm_roti_epi64(D1, -16); \
|
||||
\
|
||||
C0 = fBlaMka(C0, D0); \
|
||||
C1 = fBlaMka(C1, D1); \
|
||||
\
|
||||
B0 = _mm_xor_si128(B0, C0); \
|
||||
B1 = _mm_xor_si128(B1, C1); \
|
||||
\
|
||||
B0 = _mm_roti_epi64(B0, -63); \
|
||||
B1 = _mm_roti_epi64(B1, -63); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
t1 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
|
||||
do { \
|
||||
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
|
||||
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
|
||||
B0 = t0; \
|
||||
B1 = t1; \
|
||||
\
|
||||
t0 = C0; \
|
||||
C0 = C1; \
|
||||
C1 = t0; \
|
||||
\
|
||||
t0 = _mm_alignr_epi8(D0, D1, 8); \
|
||||
t1 = _mm_alignr_epi8(D1, D0, 8); \
|
||||
D0 = t1; \
|
||||
D1 = t0; \
|
||||
} while ((void)0, 0)
|
||||
|
||||
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
|
||||
do { \
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
\
|
||||
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
|
||||
} while ((void)0, 0)
|
||||
|
||||
|
||||
#endif /* BLAKE_ROUND_MKA_OPT_H */
|
||||
107
randomx/blake2/endian.h
Normal file
107
randomx/blake2/endian.h
Normal file
@@ -0,0 +1,107 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define FORCE_INLINE __inline
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
#define FORCE_INLINE __inline__
|
||||
#else
|
||||
#define FORCE_INLINE
|
||||
#endif
|
||||
|
||||
/* Argon2 Team - Begin Code */
|
||||
/*
|
||||
Not an exhaustive list, but should cover the majority of modern platforms
|
||||
Additionally, the code will always be correct---this is only a performance
|
||||
tweak.
|
||||
*/
|
||||
#if (defined(__BYTE_ORDER__) && \
|
||||
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
|
||||
defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
|
||||
defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
|
||||
defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
|
||||
defined(_M_ARM)
|
||||
#define NATIVE_LITTLE_ENDIAN
|
||||
#endif
|
||||
/* Argon2 Team - End Code */
|
||||
|
||||
static FORCE_INLINE uint32_t load32(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
uint32_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint32_t w = *p++;
|
||||
w |= (uint32_t)(*p++) << 8;
|
||||
w |= (uint32_t)(*p++) << 16;
|
||||
w |= (uint32_t)(*p++) << 24;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint64_t load64_native(const void *src) {
|
||||
uint64_t w;
|
||||
memcpy(&w, src, sizeof w);
|
||||
return w;
|
||||
}
|
||||
|
||||
static FORCE_INLINE uint64_t load64(const void *src) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return load64_native(src);
|
||||
#else
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
uint64_t w = *p++;
|
||||
w |= (uint64_t)(*p++) << 8;
|
||||
w |= (uint64_t)(*p++) << 16;
|
||||
w |= (uint64_t)(*p++) << 24;
|
||||
w |= (uint64_t)(*p++) << 32;
|
||||
w |= (uint64_t)(*p++) << 40;
|
||||
w |= (uint64_t)(*p++) << 48;
|
||||
w |= (uint64_t)(*p++) << 56;
|
||||
return w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store32(void *dst, uint32_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
memcpy(dst, &w, sizeof w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
|
||||
memcpy(dst, &w, sizeof w);
|
||||
}
|
||||
|
||||
static FORCE_INLINE void store64(void *dst, uint64_t w) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
store64_native(dst, w);
|
||||
#else
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
w >>= 8;
|
||||
*p++ = (uint8_t)w;
|
||||
#endif
|
||||
}
|
||||
62
randomx/blake2_generator.cpp
Normal file
62
randomx/blake2_generator.cpp
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/endian.h"
|
||||
#include "blake2_generator.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
constexpr int maxSeedSize = 60;
|
||||
|
||||
Blake2Generator::Blake2Generator(const void* seed, size_t seedSize, int nonce) : dataIndex(sizeof(data)) {
|
||||
memset(data, 0, sizeof(data));
|
||||
memcpy(data, seed, seedSize > maxSeedSize ? maxSeedSize : seedSize);
|
||||
store32(&data[maxSeedSize], nonce);
|
||||
}
|
||||
|
||||
uint8_t Blake2Generator::getByte() {
|
||||
checkData(1);
|
||||
return data[dataIndex++];
|
||||
}
|
||||
|
||||
uint32_t Blake2Generator::getUInt32() {
|
||||
checkData(4);
|
||||
auto ret = load32(&data[dataIndex]);
|
||||
dataIndex += 4;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Blake2Generator::checkData(const size_t bytesNeeded) {
|
||||
if (dataIndex + bytesNeeded > sizeof(data)) {
|
||||
blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
|
||||
dataIndex = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
46
randomx/blake2_generator.hpp
Normal file
46
randomx/blake2_generator.hpp
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Blake2Generator {
|
||||
public:
|
||||
Blake2Generator(const void* seed, size_t seedSize, int nonce = 0);
|
||||
uint8_t getByte();
|
||||
uint32_t getUInt32();
|
||||
private:
|
||||
void checkData(const size_t);
|
||||
|
||||
uint8_t data[64];
|
||||
size_t dataIndex;
|
||||
};
|
||||
}
|
||||
494
randomx/bytecode_machine.cpp
Normal file
494
randomx/bytecode_machine.cpp
Normal file
@@ -0,0 +1,494 @@
|
||||
/*
|
||||
Copyright (c) 2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "bytecode_machine.hpp"
|
||||
#include "reciprocal.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
const int_reg_t BytecodeMachine::zero = 0;
|
||||
|
||||
#define INSTR_CASE(x) case InstructionType::x: \
|
||||
exe_ ## x(ibc, pc, scratchpad, config); \
|
||||
break;
|
||||
|
||||
void BytecodeMachine::executeInstruction(RANDOMX_EXE_ARGS) {
|
||||
switch (ibc.type)
|
||||
{
|
||||
INSTR_CASE(IADD_RS)
|
||||
INSTR_CASE(IADD_M)
|
||||
INSTR_CASE(ISUB_R)
|
||||
INSTR_CASE(ISUB_M)
|
||||
INSTR_CASE(IMUL_R)
|
||||
INSTR_CASE(IMUL_M)
|
||||
INSTR_CASE(IMULH_R)
|
||||
INSTR_CASE(IMULH_M)
|
||||
INSTR_CASE(ISMULH_R)
|
||||
INSTR_CASE(ISMULH_M)
|
||||
INSTR_CASE(INEG_R)
|
||||
INSTR_CASE(IXOR_R)
|
||||
INSTR_CASE(IXOR_M)
|
||||
INSTR_CASE(IROR_R)
|
||||
INSTR_CASE(IROL_R)
|
||||
INSTR_CASE(ISWAP_R)
|
||||
INSTR_CASE(FSWAP_R)
|
||||
INSTR_CASE(FADD_R)
|
||||
INSTR_CASE(FADD_M)
|
||||
INSTR_CASE(FSUB_R)
|
||||
INSTR_CASE(FSUB_M)
|
||||
INSTR_CASE(FSCAL_R)
|
||||
INSTR_CASE(FMUL_R)
|
||||
INSTR_CASE(FDIV_M)
|
||||
INSTR_CASE(FSQRT_R)
|
||||
INSTR_CASE(CBRANCH)
|
||||
INSTR_CASE(CFROUND)
|
||||
INSTR_CASE(ISTORE)
|
||||
|
||||
case InstructionType::NOP:
|
||||
break;
|
||||
|
||||
case InstructionType::IMUL_RCP: //executed as IMUL_R
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
|
||||
int opcode = instr.opcode;
|
||||
|
||||
// printf("nreg.r[0]=%016llx\n",nreg->r[0]);
|
||||
// printf("nreg.r[1]=%016llx\n",nreg->r[1]);
|
||||
// printf("nreg.r[2]=%016llx\n",nreg->r[2]);
|
||||
// printf("nreg.r[3]=%016llx\n",nreg->r[3]);
|
||||
// printf("nreg.r[4]=%016llx\n",nreg->r[4]);
|
||||
// printf("nreg.r[5]=%016llx\n",nreg->r[5]);
|
||||
// printf("nreg.r[6]=%016llx\n",nreg->r[6]);
|
||||
// printf("nreg.r[7]=%016llx\n",nreg->r[7]);
|
||||
|
||||
//nreg.r 初始8个寄存器都是0;
|
||||
|
||||
//printf("ceil_IADD_RS= %0d,ceil_IADD_M=%0d,ceil_ISUB_R=%0d\n",ceil_IADD_RS,ceil_IADD_M,ceil_ISUB_R);
|
||||
if (opcode < ceil_IADD_RS) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IADD_RS;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (dst != RegisterNeedsDisplacement) { //RegisterNeedsDisplacement=5
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.shift = instr.getModShift();
|
||||
ibc.imm = 0;
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.shift = instr.getModShift();
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IADD_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IADD_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_ISUB_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISUB_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_ISUB_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISUB_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IMUL_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMUL_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IMUL_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMUL_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IMULH_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMULH_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IMULH_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IMULH_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_ISMULH_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISMULH_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_ISMULH_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISMULH_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IMUL_RCP) {
|
||||
uint64_t divisor = instr.getImm32();
|
||||
if (!isZeroOrPowerOf2(divisor)) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::IMUL_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = randomx_reciprocal(divisor); //******8
|
||||
ibc.isrc = &ibc.imm;
|
||||
registerUsage[dst] = i;
|
||||
}
|
||||
else {
|
||||
ibc.type = InstructionType::NOP;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_INEG_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::INEG_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IXOR_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IXOR_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IXOR_M) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IXOR_M;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
ibc.isrc = &zero;
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IROR_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IROR_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = instr.getImm32();
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_IROL_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::IROL_R;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
if (src != dst) {
|
||||
ibc.isrc = &nreg->r[src];
|
||||
}
|
||||
else {
|
||||
ibc.imm = instr.getImm32();
|
||||
ibc.isrc = &ibc.imm;
|
||||
}
|
||||
registerUsage[dst] = i;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_ISWAP_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
if (src != dst) {
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.type = InstructionType::ISWAP_R;
|
||||
registerUsage[dst] = i;
|
||||
registerUsage[src] = i;
|
||||
}
|
||||
else {
|
||||
ibc.type = InstructionType::NOP;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FSWAP_R) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
ibc.type = InstructionType::FSWAP_R;
|
||||
if (dst < RegisterCountFlt)
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
else
|
||||
ibc.fdst = &nreg->e[dst - RegisterCountFlt];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FADD_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FADD_R;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FADD_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FADD_M;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FSUB_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FSUB_R;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FSUB_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FSUB_M;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FSCAL_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
ibc.fdst = &nreg->f[dst];
|
||||
ibc.type = InstructionType::FSCAL_R;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FMUL_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FMUL_R;
|
||||
ibc.fdst = &nreg->e[dst];
|
||||
ibc.fsrc = &nreg->a[src];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FDIV_M) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::FDIV_M;
|
||||
ibc.fdst = &nreg->e[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_FSQRT_R) {
|
||||
auto dst = instr.dst % RegisterCountFlt;
|
||||
ibc.type = InstructionType::FSQRT_R;
|
||||
ibc.fdst = &nreg->e[dst];
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_CBRANCH) {
|
||||
ibc.type = InstructionType::CBRANCH;
|
||||
//jump condition
|
||||
int creg = instr.dst % RegistersCount;
|
||||
ibc.idst = &nreg->r[creg];
|
||||
ibc.target = registerUsage[creg];
|
||||
int shift = instr.getModCond() + ConditionOffset;
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
|
||||
if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
|
||||
ibc.imm &= ~(1ULL << (shift - 1));
|
||||
ibc.memMask = ConditionMask << shift;
|
||||
//mark all registers as used
|
||||
for (unsigned j = 0; j < RegistersCount; ++j) {
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_CFROUND) {
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.type = InstructionType::CFROUND;
|
||||
ibc.imm = instr.getImm32() & 63;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_ISTORE) {
|
||||
auto dst = instr.dst % RegistersCount;
|
||||
auto src = instr.src % RegistersCount;
|
||||
ibc.type = InstructionType::ISTORE;
|
||||
ibc.idst = &nreg->r[dst];
|
||||
ibc.isrc = &nreg->r[src];
|
||||
ibc.imm = signExtend2sCompl(instr.getImm32());
|
||||
if (instr.getModCond() < StoreL3Condition) //StoreL3Condition= 14
|
||||
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
else
|
||||
ibc.memMask = ScratchpadL3Mask;
|
||||
return;
|
||||
}
|
||||
|
||||
if (opcode < ceil_NOP) {
|
||||
ibc.type = InstructionType::NOP;
|
||||
return;
|
||||
}
|
||||
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
322
randomx/bytecode_machine.hpp
Normal file
322
randomx/bytecode_machine.hpp
Normal file
@@ -0,0 +1,322 @@
|
||||
/*
|
||||
Copyright (c) 2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "instruction.hpp"
|
||||
#include "program.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
//register file in machine byte order
|
||||
struct NativeRegisterFile {
|
||||
int_reg_t r[RegistersCount] = { 0 };
|
||||
rx_vec_f128 f[RegisterCountFlt];
|
||||
rx_vec_f128 e[RegisterCountFlt];
|
||||
rx_vec_f128 a[RegisterCountFlt];
|
||||
};
|
||||
|
||||
struct InstructionByteCode {
|
||||
union {
|
||||
int_reg_t* idst;
|
||||
rx_vec_f128* fdst;
|
||||
};
|
||||
union {
|
||||
const int_reg_t* isrc;
|
||||
const rx_vec_f128* fsrc;
|
||||
};
|
||||
union {
|
||||
uint64_t imm;
|
||||
int64_t simm;
|
||||
};
|
||||
InstructionType type;
|
||||
union {
|
||||
int16_t target;
|
||||
uint16_t shift;
|
||||
};
|
||||
uint32_t memMask;
|
||||
};
|
||||
|
||||
#define OPCODE_CEIL_DECLARE(curr, prev) constexpr int ceil_ ## curr = ceil_ ## prev + RANDOMX_FREQ_ ## curr;
|
||||
constexpr int ceil_NULL = 0;
|
||||
OPCODE_CEIL_DECLARE(IADD_RS, NULL);
|
||||
OPCODE_CEIL_DECLARE(IADD_M, IADD_RS);
|
||||
OPCODE_CEIL_DECLARE(ISUB_R, IADD_M);
|
||||
OPCODE_CEIL_DECLARE(ISUB_M, ISUB_R);
|
||||
OPCODE_CEIL_DECLARE(IMUL_R, ISUB_M);
|
||||
OPCODE_CEIL_DECLARE(IMUL_M, IMUL_R);
|
||||
OPCODE_CEIL_DECLARE(IMULH_R, IMUL_M);
|
||||
OPCODE_CEIL_DECLARE(IMULH_M, IMULH_R);
|
||||
OPCODE_CEIL_DECLARE(ISMULH_R, IMULH_M);
|
||||
OPCODE_CEIL_DECLARE(ISMULH_M, ISMULH_R);
|
||||
OPCODE_CEIL_DECLARE(IMUL_RCP, ISMULH_M);
|
||||
OPCODE_CEIL_DECLARE(INEG_R, IMUL_RCP);
|
||||
OPCODE_CEIL_DECLARE(IXOR_R, INEG_R);
|
||||
OPCODE_CEIL_DECLARE(IXOR_M, IXOR_R);
|
||||
OPCODE_CEIL_DECLARE(IROR_R, IXOR_M);
|
||||
OPCODE_CEIL_DECLARE(IROL_R, IROR_R);
|
||||
OPCODE_CEIL_DECLARE(ISWAP_R, IROL_R);
|
||||
OPCODE_CEIL_DECLARE(FSWAP_R, ISWAP_R);
|
||||
OPCODE_CEIL_DECLARE(FADD_R, FSWAP_R);
|
||||
OPCODE_CEIL_DECLARE(FADD_M, FADD_R);
|
||||
OPCODE_CEIL_DECLARE(FSUB_R, FADD_M);
|
||||
OPCODE_CEIL_DECLARE(FSUB_M, FSUB_R);
|
||||
OPCODE_CEIL_DECLARE(FSCAL_R, FSUB_M);
|
||||
OPCODE_CEIL_DECLARE(FMUL_R, FSCAL_R);
|
||||
OPCODE_CEIL_DECLARE(FDIV_M, FMUL_R);
|
||||
OPCODE_CEIL_DECLARE(FSQRT_R, FDIV_M);
|
||||
OPCODE_CEIL_DECLARE(CBRANCH, FSQRT_R);
|
||||
OPCODE_CEIL_DECLARE(CFROUND, CBRANCH);
|
||||
OPCODE_CEIL_DECLARE(ISTORE, CFROUND);
|
||||
OPCODE_CEIL_DECLARE(NOP, ISTORE);
|
||||
#undef OPCODE_CEIL_DECLARE
|
||||
|
||||
#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config
|
||||
#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc
|
||||
|
||||
class BytecodeMachine;
|
||||
|
||||
typedef void(BytecodeMachine::*InstructionGenBytecode)(RANDOMX_GEN_ARGS);
|
||||
|
||||
class BytecodeMachine {
|
||||
public:
|
||||
void beginCompilation(NativeRegisterFile& regFile) {
|
||||
for (unsigned i = 0; i < RegistersCount; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
nreg = ®File;
|
||||
}
|
||||
|
||||
void compileProgram(Program& program, InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], NativeRegisterFile& regFile) {
|
||||
beginCompilation(regFile);
|
||||
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { //256
|
||||
auto& instr = program(i);
|
||||
auto& ibc = bytecode[i];
|
||||
compileInstruction(instr, i, ibc);
|
||||
}
|
||||
}
|
||||
|
||||
static void executeBytecode(InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], uint8_t* scratchpad, ProgramConfiguration& config) {
|
||||
for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
|
||||
auto& ibc = bytecode[pc];
|
||||
executeInstruction(ibc, pc, scratchpad, config);
|
||||
}
|
||||
}
|
||||
|
||||
void compileInstruction(RANDOMX_GEN_ARGS)
|
||||
#ifdef RANDOMX_GEN_TABLE
|
||||
{
|
||||
auto generator = genTable[instr.opcode];
|
||||
(this->*generator)(instr, i, ibc);
|
||||
}
|
||||
#else
|
||||
;
|
||||
#endif
|
||||
|
||||
static void executeInstruction(RANDOMX_EXE_ARGS);
|
||||
|
||||
static void exe_IADD_RS(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
|
||||
}
|
||||
|
||||
static void exe_IADD_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst += load64(getScratchpadAddress(ibc, scratchpad));
|
||||
}
|
||||
|
||||
static void exe_ISUB_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst -= *ibc.isrc;
|
||||
}
|
||||
|
||||
static void exe_ISUB_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst -= load64(getScratchpadAddress(ibc, scratchpad));
|
||||
}
|
||||
|
||||
static void exe_IMUL_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst *= *ibc.isrc;
|
||||
}
|
||||
|
||||
static void exe_IMUL_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst *= load64(getScratchpadAddress(ibc, scratchpad));
|
||||
}
|
||||
|
||||
static void exe_IMULH_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = mulh(*ibc.idst, *ibc.isrc);
|
||||
}
|
||||
|
||||
static void exe_IMULH_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc, scratchpad)));
|
||||
}
|
||||
|
||||
static void exe_ISMULH_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc));
|
||||
}
|
||||
|
||||
static void exe_ISMULH_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc, scratchpad))));
|
||||
}
|
||||
|
||||
static void exe_INEG_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = ~(*ibc.idst) + 1; //two's complement negative
|
||||
}
|
||||
|
||||
static void exe_IXOR_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst ^= *ibc.isrc;
|
||||
}
|
||||
|
||||
static void exe_IXOR_M(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst ^= load64(getScratchpadAddress(ibc, scratchpad));
|
||||
}
|
||||
|
||||
static void exe_IROR_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63);
|
||||
}
|
||||
|
||||
static void exe_IROL_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63);
|
||||
}
|
||||
|
||||
static void exe_ISWAP_R(RANDOMX_EXE_ARGS) {
|
||||
int_reg_t temp = *ibc.isrc;
|
||||
*(int_reg_t*)ibc.isrc = *ibc.idst;
|
||||
*ibc.idst = temp;
|
||||
}
|
||||
|
||||
static void exe_FSWAP_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_swap_vec_f128(*ibc.fdst);
|
||||
}
|
||||
|
||||
static void exe_FADD_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc);
|
||||
}
|
||||
|
||||
static void exe_FADD_M(RANDOMX_EXE_ARGS) {
|
||||
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
|
||||
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc);
|
||||
}
|
||||
|
||||
static void exe_FSUB_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc);
|
||||
}
|
||||
|
||||
static void exe_FSUB_M(RANDOMX_EXE_ARGS) {
|
||||
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
|
||||
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc);
|
||||
}
|
||||
|
||||
static void exe_FSCAL_R(RANDOMX_EXE_ARGS) {
|
||||
const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000);
|
||||
*ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask);
|
||||
}
|
||||
|
||||
static void exe_FMUL_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc);
|
||||
}
|
||||
|
||||
static void exe_FDIV_M(RANDOMX_EXE_ARGS) {
|
||||
rx_vec_f128 fsrc = maskRegisterExponentMantissa(
|
||||
config,
|
||||
rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad))
|
||||
);
|
||||
*ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc);
|
||||
}
|
||||
|
||||
static void exe_FSQRT_R(RANDOMX_EXE_ARGS) {
|
||||
*ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst);
|
||||
}
|
||||
|
||||
static void exe_CBRANCH(RANDOMX_EXE_ARGS) {
|
||||
*ibc.idst += ibc.imm;
|
||||
if ((*ibc.idst & ibc.memMask) == 0) {
|
||||
pc = ibc.target;
|
||||
}
|
||||
}
|
||||
|
||||
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
|
||||
rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4);
|
||||
}
|
||||
|
||||
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
|
||||
store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc);
|
||||
}
|
||||
protected:
|
||||
static rx_vec_f128 maskRegisterExponentMantissa(ProgramConfiguration& config, rx_vec_f128 x) {
|
||||
const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask);
|
||||
const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask);
|
||||
x = rx_and_vec_f128(x, xmantissaMask);
|
||||
x = rx_or_vec_f128(x, xexponentMask);
|
||||
return x;
|
||||
}
|
||||
|
||||
private:
|
||||
static const int_reg_t zero;
|
||||
int registerUsage[RegistersCount];
|
||||
NativeRegisterFile* nreg;
|
||||
|
||||
static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) {
|
||||
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
|
||||
return scratchpad + addr;
|
||||
}
|
||||
|
||||
#ifdef RANDOMX_GEN_TABLE
|
||||
static InstructionGenBytecode genTable[256];
|
||||
|
||||
void gen_IADD_RS(RANDOMX_GEN_ARGS);
|
||||
void gen_IADD_M(RANDOMX_GEN_ARGS);
|
||||
void gen_ISUB_R(RANDOMX_GEN_ARGS);
|
||||
void gen_ISUB_M(RANDOMX_GEN_ARGS);
|
||||
void gen_IMUL_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IMUL_M(RANDOMX_GEN_ARGS);
|
||||
void gen_IMULH_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IMULH_M(RANDOMX_GEN_ARGS);
|
||||
void gen_ISMULH_R(RANDOMX_GEN_ARGS);
|
||||
void gen_ISMULH_M(RANDOMX_GEN_ARGS);
|
||||
void gen_IMUL_RCP(RANDOMX_GEN_ARGS);
|
||||
void gen_INEG_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IXOR_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IXOR_M(RANDOMX_GEN_ARGS);
|
||||
void gen_IROR_R(RANDOMX_GEN_ARGS);
|
||||
void gen_IROL_R(RANDOMX_GEN_ARGS);
|
||||
void gen_ISWAP_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FSWAP_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FADD_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FADD_M(RANDOMX_GEN_ARGS);
|
||||
void gen_FSUB_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FSUB_M(RANDOMX_GEN_ARGS);
|
||||
void gen_FSCAL_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FMUL_R(RANDOMX_GEN_ARGS);
|
||||
void gen_FDIV_M(RANDOMX_GEN_ARGS);
|
||||
void gen_FSQRT_R(RANDOMX_GEN_ARGS);
|
||||
void gen_CBRANCH(RANDOMX_GEN_ARGS);
|
||||
void gen_CFROUND(RANDOMX_GEN_ARGS);
|
||||
void gen_ISTORE(RANDOMX_GEN_ARGS);
|
||||
void gen_NOP(RANDOMX_GEN_ARGS);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
187
randomx/common.hpp
Normal file
187
randomx/common.hpp
Normal file
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <climits>
|
||||
#include "blake2/endian.h"
|
||||
#include "configuration.h"
|
||||
#include "randomx.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
static_assert(RANDOMX_ARGON_MEMORY >= 8, "RANDOMX_ARGON_MEMORY must be at least 8.");
|
||||
static_assert(RANDOMX_ARGON_MEMORY <= 2097152, "RANDOMX_ARGON_MEMORY must not exceed 2097152.");
|
||||
static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
|
||||
static_assert(RANDOMX_ARGON_ITERATIONS > 0 && RANDOMX_ARGON_ITERATIONS < UINT32_MAX, "RANDOMX_ARGON_ITERATIONS must be a positive 32-bit integer.");
|
||||
static_assert(RANDOMX_ARGON_LANES > 0 && RANDOMX_ARGON_LANES <= 16777215, "RANDOMX_ARGON_LANES out of range");
|
||||
static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64.");
|
||||
static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2.");
|
||||
static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296.");
|
||||
static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
|
||||
static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB.");
|
||||
static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0");
|
||||
static_assert(RANDOMX_PROGRAM_SIZE <= 32768, "RANDOMX_PROGRAM_SIZE must not exceed 32768");
|
||||
static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0");
|
||||
static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0");
|
||||
static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2.");
|
||||
static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2.");
|
||||
static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2.");
|
||||
static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
|
||||
static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64.");
|
||||
static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
|
||||
static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
|
||||
static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0");
|
||||
static_assert(RANDOMX_SUPERSCALAR_LATENCY <= 10000, "RANDOMX_SUPERSCALAR_LATENCY must not exceed 10000");
|
||||
static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
|
||||
static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
|
||||
static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
|
||||
|
||||
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
|
||||
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
|
||||
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
|
||||
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
|
||||
RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
|
||||
RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
|
||||
RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;
|
||||
|
||||
static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
|
||||
|
||||
|
||||
constexpr uint32_t ArgonBlockSize = 1024;
|
||||
constexpr int ArgonSaltSize = sizeof("" RANDOMX_ARGON_SALT) - 1;
|
||||
static_assert(ArgonSaltSize >= 8, "RANDOMX_ARGON_SALT must be at least 8 characters long");
|
||||
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2;
|
||||
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
|
||||
constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3;
|
||||
constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1);
|
||||
constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize;
|
||||
constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
|
||||
constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE;
|
||||
constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1);
|
||||
constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET;
|
||||
constexpr int StoreL3Condition = 14;
|
||||
|
||||
//Prevent some unsafe configurations.
|
||||
#ifndef RANDOMX_UNSAFE
|
||||
static_assert((uint64_t)ArgonBlockSize * RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY + 33554432 >= (uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs");
|
||||
static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes");
|
||||
static_assert(RANDOMX_PROGRAM_COUNT > 1, "Unsafe configuration: Program filtering strategies");
|
||||
static_assert(RANDOMX_PROGRAM_SIZE >= 64, "Unsafe configuration: Low program entropy");
|
||||
static_assert(RANDOMX_PROGRAM_ITERATIONS >= 400, "Unsafe configuration: High compilation overhead");
|
||||
#endif
|
||||
|
||||
#ifdef TRACE
|
||||
constexpr bool trace = true;
|
||||
#else
|
||||
constexpr bool trace = false;
|
||||
#endif
|
||||
|
||||
#ifndef UNREACHABLE
|
||||
#ifdef __GNUC__
|
||||
#define UNREACHABLE __builtin_unreachable()
|
||||
#elif _MSC_VER
|
||||
#define UNREACHABLE __assume(false)
|
||||
#else
|
||||
#define UNREACHABLE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define RANDOMX_HAVE_COMPILER 1
|
||||
class JitCompilerX86;
|
||||
using JitCompiler = JitCompilerX86;
|
||||
#elif defined(__aarch64__)
|
||||
#define RANDOMX_HAVE_COMPILER 1
|
||||
class JitCompilerA64;
|
||||
using JitCompiler = JitCompilerA64;
|
||||
#else
|
||||
#define RANDOMX_HAVE_COMPILER 0
|
||||
class JitCompilerFallback;
|
||||
using JitCompiler = JitCompilerFallback;
|
||||
#endif
|
||||
|
||||
using addr_t = uint32_t;
|
||||
|
||||
using int_reg_t = uint64_t;
|
||||
|
||||
struct fpu_reg_t {
|
||||
double lo;
|
||||
double hi;
|
||||
};
|
||||
|
||||
constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t);
|
||||
constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t);
|
||||
constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t);
|
||||
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
|
||||
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
|
||||
constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16;
|
||||
constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16;
|
||||
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
|
||||
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
|
||||
constexpr int RegistersCount = 8;
|
||||
constexpr int RegisterCountFlt = RegistersCount / 2;
|
||||
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
|
||||
constexpr int RegisterNeedsSib = 4; //x86 r12 register
|
||||
|
||||
inline bool isZeroOrPowerOf2(uint64_t x) {
|
||||
return (x & (x - 1)) == 0;
|
||||
}
|
||||
|
||||
constexpr int mantissaSize = 52;
|
||||
constexpr int exponentSize = 11;
|
||||
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
|
||||
constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
|
||||
constexpr int exponentBias = 1023;
|
||||
constexpr int dynamicExponentBits = 4;
|
||||
constexpr int staticExponentBits = 4;
|
||||
constexpr uint64_t constExponentBits = 0x300;
|
||||
constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1;
|
||||
|
||||
struct MemoryRegisters {
|
||||
addr_t mx, ma;
|
||||
uint8_t* memory = nullptr;
|
||||
};
|
||||
|
||||
//register file in little-endian byte order
|
||||
struct RegisterFile {
|
||||
int_reg_t r[RegistersCount];
|
||||
fpu_reg_t f[RegisterCountFlt];
|
||||
fpu_reg_t e[RegisterCountFlt];
|
||||
fpu_reg_t a[RegisterCountFlt];
|
||||
};
|
||||
|
||||
typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
|
||||
typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
|
||||
typedef void(DatasetDeallocFunc)(randomx_dataset*);
|
||||
typedef void(CacheDeallocFunc)(randomx_cache*);
|
||||
typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t);
|
||||
}
|
||||
125
randomx/configuration.h
Normal file
125
randomx/configuration.h
Normal file
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
//Cache size in KiB. Must be a power of 2.
|
||||
#define RANDOMX_ARGON_MEMORY 262144
|
||||
|
||||
//Number of Argon2d iterations for Cache initialization.
|
||||
#define RANDOMX_ARGON_ITERATIONS 3
|
||||
|
||||
//Number of parallel lanes for Cache initialization.
|
||||
#define RANDOMX_ARGON_LANES 1
|
||||
|
||||
//Argon2d salt
|
||||
#define RANDOMX_ARGON_SALT "RandomX\x03"
|
||||
|
||||
//Number of random Cache accesses per Dataset item. Minimum is 2.
|
||||
#define RANDOMX_CACHE_ACCESSES 8
|
||||
|
||||
//Target latency for SuperscalarHash (in cycles of the reference CPU).
|
||||
#define RANDOMX_SUPERSCALAR_LATENCY 170
|
||||
|
||||
//Dataset base size in bytes. Must be a power of 2.
|
||||
#define RANDOMX_DATASET_BASE_SIZE 2147483648
|
||||
|
||||
//Dataset extra size. Must be divisible by 64.
|
||||
#define RANDOMX_DATASET_EXTRA_SIZE 33554368
|
||||
|
||||
//Number of instructions in a RandomX program. Must be divisible by 8.
|
||||
#define RANDOMX_PROGRAM_SIZE 256
|
||||
|
||||
//Number of iterations during VM execution.
|
||||
#define RANDOMX_PROGRAM_ITERATIONS 2048
|
||||
|
||||
//Number of chained VM executions per hash.
|
||||
#define RANDOMX_PROGRAM_COUNT 8
|
||||
|
||||
//Scratchpad L3 size in bytes. Must be a power of 2.
|
||||
#define RANDOMX_SCRATCHPAD_L3 2097152
|
||||
|
||||
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
|
||||
#define RANDOMX_SCRATCHPAD_L2 262144
|
||||
|
||||
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
|
||||
#define RANDOMX_SCRATCHPAD_L1 16384
|
||||
|
||||
//Jump condition mask size in bits.
|
||||
#define RANDOMX_JUMP_BITS 8
|
||||
|
||||
//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
|
||||
#define RANDOMX_JUMP_OFFSET 8
|
||||
|
||||
/*
|
||||
Instruction frequencies (per 256 opcodes)
|
||||
Total sum of frequencies must be 256
|
||||
*/
|
||||
|
||||
//Integer instructions
|
||||
#define RANDOMX_FREQ_IADD_RS 16
|
||||
#define RANDOMX_FREQ_IADD_M 7
|
||||
#define RANDOMX_FREQ_ISUB_R 16
|
||||
#define RANDOMX_FREQ_ISUB_M 7
|
||||
#define RANDOMX_FREQ_IMUL_R 16
|
||||
#define RANDOMX_FREQ_IMUL_M 4
|
||||
#define RANDOMX_FREQ_IMULH_R 4
|
||||
#define RANDOMX_FREQ_IMULH_M 1
|
||||
#define RANDOMX_FREQ_ISMULH_R 4
|
||||
#define RANDOMX_FREQ_ISMULH_M 1
|
||||
#define RANDOMX_FREQ_IMUL_RCP 8
|
||||
#define RANDOMX_FREQ_INEG_R 2
|
||||
#define RANDOMX_FREQ_IXOR_R 15
|
||||
#define RANDOMX_FREQ_IXOR_M 5
|
||||
#define RANDOMX_FREQ_IROR_R 8
|
||||
#define RANDOMX_FREQ_IROL_R 2
|
||||
#define RANDOMX_FREQ_ISWAP_R 4
|
||||
|
||||
//Floating point instructions
|
||||
#define RANDOMX_FREQ_FSWAP_R 4
|
||||
#define RANDOMX_FREQ_FADD_R 16
|
||||
#define RANDOMX_FREQ_FADD_M 5
|
||||
#define RANDOMX_FREQ_FSUB_R 16
|
||||
#define RANDOMX_FREQ_FSUB_M 5
|
||||
#define RANDOMX_FREQ_FSCAL_R 6
|
||||
#define RANDOMX_FREQ_FMUL_R 32
|
||||
#define RANDOMX_FREQ_FDIV_M 4
|
||||
#define RANDOMX_FREQ_FSQRT_R 6
|
||||
|
||||
//Control instructions
|
||||
#define RANDOMX_FREQ_CBRANCH 25
|
||||
#define RANDOMX_FREQ_CFROUND 1
|
||||
|
||||
//Store instruction
|
||||
#define RANDOMX_FREQ_ISTORE 16
|
||||
|
||||
//No-op instruction
|
||||
#define RANDOMX_FREQ_NOP 0
|
||||
/* ------
|
||||
256
|
||||
*/
|
||||
72
randomx/cpu.cpp
Normal file
72
randomx/cpu.cpp
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
Copyright (c) 2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "cpu.hpp"
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#define HAVE_CPUID
|
||||
#ifdef _WIN32
|
||||
#include <intrin.h>
|
||||
#define cpuid(info, x) __cpuidex(info, x, 0)
|
||||
#else //GCC
|
||||
#include <cpuid.h>
|
||||
void cpuid(int info[4], int InfoType) {
|
||||
__cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_HWCAP)
|
||||
#include <sys/auxv.h>
|
||||
#include <asm/hwcap.h>
|
||||
#endif
|
||||
|
||||
namespace randomx {
|
||||
|
||||
Cpu::Cpu() : aes_(false), ssse3_(false), avx2_(false) {
|
||||
#ifdef HAVE_CPUID
|
||||
int info[4];
|
||||
cpuid(info, 0);
|
||||
int nIds = info[0];
|
||||
if (nIds >= 0x00000001) {
|
||||
cpuid(info, 0x00000001);
|
||||
ssse3_ = (info[2] & (1 << 9)) != 0;
|
||||
aes_ = (info[2] & (1 << 25)) != 0;
|
||||
}
|
||||
if (nIds >= 0x00000007) {
|
||||
cpuid(info, 0x00000007);
|
||||
avx2_ = (info[1] & (1 << 5)) != 0;
|
||||
}
|
||||
#elif defined(__aarch64__) && defined(HWCAP_AES)
|
||||
long hwcaps = getauxval(AT_HWCAP);
|
||||
aes_ = (hwcaps & HWCAP_AES) != 0;
|
||||
#endif
|
||||
//TODO POWER8 AES
|
||||
}
|
||||
|
||||
}
|
||||
49
randomx/cpu.hpp
Normal file
49
randomx/cpu.hpp
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
Copyright (c) 2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Cpu {
|
||||
public:
|
||||
Cpu();
|
||||
bool hasAes() const {
|
||||
return aes_;
|
||||
}
|
||||
bool hasSsse3() const {
|
||||
return ssse3_;
|
||||
}
|
||||
bool hasAvx2() const {
|
||||
return avx2_;
|
||||
}
|
||||
private:
|
||||
bool aes_, ssse3_, avx2_;
|
||||
};
|
||||
|
||||
}
|
||||
212
randomx/dataset.cpp
Normal file
212
randomx/dataset.cpp
Normal file
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Original code from Argon2 reference source code package used under CC0 Licence
|
||||
* https://github.com/P-H-C/phc-winner-argon2
|
||||
* Copyright 2015
|
||||
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
|
||||
*/
|
||||
|
||||
#include <new>
|
||||
#include <algorithm>
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <cstring>
|
||||
#include <cassert>
|
||||
|
||||
#include "common.hpp"
|
||||
#include "dataset.hpp"
|
||||
#include "virtual_memory.hpp"
|
||||
#include "superscalar.hpp"
|
||||
#include "blake2_generator.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "blake2/endian.h"
|
||||
#include "argon2.h"
|
||||
#include "argon2_core.h"
|
||||
#include "jit_compiler.hpp"
|
||||
#include "intrin_portable.h"
|
||||
|
||||
static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
|
||||
static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
|
||||
|
||||
namespace randomx {
|
||||
|
||||
template<class Allocator>
|
||||
void deallocCache(randomx_cache* cache) {
|
||||
if (cache->memory != nullptr)
|
||||
Allocator::freeMemory(cache->memory, CacheSize);
|
||||
if (cache->jit != nullptr)
|
||||
delete cache->jit;
|
||||
}
|
||||
|
||||
template void deallocCache<DefaultAllocator>(randomx_cache* cache);
|
||||
template void deallocCache<LargePageAllocator>(randomx_cache* cache);
|
||||
|
||||
void initCache(randomx_cache* cache, const void* key, size_t keySize) {
|
||||
uint32_t memory_blocks, segment_length;
|
||||
argon2_instance_t instance;
|
||||
argon2_context context;
|
||||
|
||||
context.out = nullptr;
|
||||
context.outlen = 0;
|
||||
context.pwd = CONST_CAST(uint8_t *)key;
|
||||
context.pwdlen = (uint32_t)keySize;
|
||||
context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT;
|
||||
context.saltlen = (uint32_t)randomx::ArgonSaltSize;
|
||||
context.secret = NULL;
|
||||
context.secretlen = 0;
|
||||
context.ad = NULL;
|
||||
context.adlen = 0;
|
||||
context.t_cost = RANDOMX_ARGON_ITERATIONS;
|
||||
context.m_cost = RANDOMX_ARGON_MEMORY;
|
||||
context.lanes = RANDOMX_ARGON_LANES; //1
|
||||
context.threads = 1;
|
||||
context.allocate_cbk = NULL;
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_NUMBER;
|
||||
|
||||
int inputsValid = randomx_argon2_validate_inputs(&context);
|
||||
assert(inputsValid == ARGON2_OK);
|
||||
|
||||
/* 2. Align memory size */
|
||||
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
|
||||
memory_blocks = context.m_cost; //262144
|
||||
|
||||
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); //ARGON2_SYNC_POINTS=4
|
||||
|
||||
instance.version = context.version;
|
||||
instance.memory = NULL;
|
||||
instance.passes = context.t_cost;
|
||||
instance.memory_blocks = memory_blocks;
|
||||
instance.segment_length = segment_length;
|
||||
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
|
||||
instance.lanes = context.lanes;
|
||||
instance.threads = context.threads;
|
||||
instance.type = Argon2_d;
|
||||
instance.memory = (block*)cache->memory;
|
||||
instance.impl = cache->argonImpl;
|
||||
|
||||
if (instance.threads > instance.lanes) {
|
||||
instance.threads = instance.lanes;
|
||||
}
|
||||
|
||||
/* 3. Initialization: Hashing inputs, allocating memory, filling first
|
||||
* blocks
|
||||
*/
|
||||
randomx_argon2_initialize(&instance, &context);
|
||||
|
||||
randomx_argon2_fill_memory_blocks(&instance);
|
||||
|
||||
cache->reciprocalCache.clear();
|
||||
randomx::Blake2Generator gen(key, keySize);
|
||||
for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { //RANDOMX_CACHE_ACCESSES =8
|
||||
randomx::generateSuperscalar(cache->programs[i], gen);
|
||||
for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) {
|
||||
auto& instr = cache->programs[i](j);
|
||||
if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) {
|
||||
auto rcp = randomx_reciprocal(instr.getImm32());
|
||||
instr.setImm32(cache->reciprocalCache.size());
|
||||
cache->reciprocalCache.push_back(rcp);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("initial the cache finished\n");
|
||||
}
|
||||
|
||||
void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) {
|
||||
initCache(cache, key, keySize);
|
||||
cache->jit->enableWriting();
|
||||
cache->jit->generateSuperscalarHash(cache->programs, cache->reciprocalCache);
|
||||
cache->jit->generateDatasetInitCode();
|
||||
cache->jit->enableExecution();
|
||||
}
|
||||
|
||||
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
|
||||
constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL;
|
||||
constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
|
||||
constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL;
|
||||
constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
|
||||
constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
|
||||
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
|
||||
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
|
||||
|
||||
static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
|
||||
constexpr uint32_t mask = CacheSize / CacheLineSize - 1;
|
||||
return memory + (registerValue & mask) * CacheLineSize;
|
||||
}
|
||||
|
||||
void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) {
|
||||
//printf("xxxxxxxx\n");
|
||||
int_reg_t rl[8];
|
||||
uint8_t* mixBlock;
|
||||
uint64_t registerValue = itemNumber;
|
||||
rl[0] = (itemNumber + 1) * superscalarMul0;
|
||||
rl[1] = rl[0] ^ superscalarAdd1;
|
||||
rl[2] = rl[0] ^ superscalarAdd2;
|
||||
rl[3] = rl[0] ^ superscalarAdd3;
|
||||
rl[4] = rl[0] ^ superscalarAdd4;
|
||||
rl[5] = rl[0] ^ superscalarAdd5;
|
||||
rl[6] = rl[0] ^ superscalarAdd6;
|
||||
rl[7] = rl[0] ^ superscalarAdd7;
|
||||
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { //RANDOMX_CACHE_ACCESSES=8
|
||||
mixBlock = getMixBlock(registerValue, cache->memory);
|
||||
rx_prefetch_nta(mixBlock);
|
||||
SuperscalarProgram& prog = cache->programs[i];
|
||||
|
||||
executeSuperscalar(rl, prog, &cache->reciprocalCache);
|
||||
|
||||
for (unsigned q = 0; q < 8; ++q)
|
||||
rl[q] ^= load64_native(mixBlock + 8 * q);
|
||||
|
||||
registerValue = rl[prog.getAddressRegister()];
|
||||
}
|
||||
|
||||
memcpy(out, &rl, CacheLineSize);
|
||||
}
|
||||
|
||||
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) {
|
||||
printf("initial the dataset\n");
|
||||
for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize){
|
||||
initDatasetItem(cache, dataset, itemNumber);
|
||||
|
||||
//if (itemNumber==(endItem-1))
|
||||
//{
|
||||
// printf("endItem= %0d\n",endItem);
|
||||
// for (int i = 0; i < CacheLineSize; ++i)
|
||||
// {
|
||||
// printf("%02x ",dataset[i]);
|
||||
// }
|
||||
// printf("\n");
|
||||
//}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//b3 1f 7e c5 cd 28 eb 4b b6 72 7e 15 7d b0 6a 63 0b d4 dc 32 fb 18 eb 25 b4 f2 09 9b 9b 5d 39 ab 0d 2d d0 e9 ed 5f b7 a5 ae 31 bc d1 8f 01 d4 04 91 aa 62 01 db 47 a7 0d 2b 42 b9 b3 43 cd 78 c9
|
||||
94
randomx/dataset.hpp
Normal file
94
randomx/dataset.hpp
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
#include "common.hpp"
|
||||
#include "superscalar_program.hpp"
|
||||
#include "allocator.hpp"
|
||||
#include "argon2.h"
|
||||
|
||||
/* Global scope for C binding */
|
||||
struct randomx_dataset {
|
||||
uint8_t* memory = nullptr;
|
||||
randomx::DatasetDeallocFunc* dealloc;
|
||||
};
|
||||
|
||||
/* Global scope for C binding */
|
||||
struct randomx_cache {
|
||||
uint8_t* memory = nullptr;
|
||||
randomx::CacheDeallocFunc* dealloc;
|
||||
randomx::JitCompiler* jit;
|
||||
randomx::CacheInitializeFunc* initialize;
|
||||
randomx::DatasetInitFunc* datasetInit;
|
||||
randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; //RANDOMX_CACHE_ACCESSES =8
|
||||
std::vector<uint64_t> reciprocalCache;
|
||||
std::string cacheKey;
|
||||
randomx_argon2_impl* argonImpl;
|
||||
|
||||
bool isInitialized() {
|
||||
return programs[0].getSize() != 0;
|
||||
}
|
||||
};
|
||||
|
||||
//A pointer to a standard-layout struct object points to its initial member
|
||||
static_assert(std::is_standard_layout<randomx_dataset>(), "randomx_dataset must be a standard-layout struct");
|
||||
//the following assert fails when compiling Debug in Visual Studio (JIT mode will crash in Debug)
|
||||
static_assert(std::is_standard_layout<randomx_cache>(), "randomx_cache must be a standard-layout struct");
|
||||
|
||||
namespace randomx {
|
||||
|
||||
using DefaultAllocator = AlignedAllocator<CacheLineSize>;
|
||||
|
||||
template<class Allocator>
|
||||
void deallocDataset(randomx_dataset* dataset) {
|
||||
if (dataset->memory != nullptr)
|
||||
Allocator::freeMemory(dataset->memory, DatasetSize);
|
||||
}
|
||||
|
||||
template<class Allocator>
|
||||
void deallocCache(randomx_cache* cache);
|
||||
|
||||
void initCache(randomx_cache*, const void*, size_t);
|
||||
void initCacheCompile(randomx_cache*, const void*, size_t);
|
||||
void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber);
|
||||
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
|
||||
|
||||
inline randomx_argon2_impl* selectArgonImpl(randomx_flags flags) {
|
||||
if (flags & RANDOMX_FLAG_ARGON2_AVX2) {
|
||||
return randomx_argon2_impl_avx2();
|
||||
}
|
||||
if (flags & RANDOMX_FLAG_ARGON2_SSSE3) {
|
||||
return randomx_argon2_impl_ssse3();
|
||||
}
|
||||
return &randomx_argon2_fill_segment_ref;
|
||||
}
|
||||
}
|
||||
390
randomx/instruction.cpp
Normal file
390
randomx/instruction.cpp
Normal file
@@ -0,0 +1,390 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "instruction.hpp"
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
void Instruction::print(std::ostream& os) const {
|
||||
os << names[opcode] << " ";
|
||||
auto handler = engine[opcode];
|
||||
(this->*handler)(os);
|
||||
}
|
||||
|
||||
void Instruction::genAddressReg(std::ostream& os, int srcIndex) const {
|
||||
os << (getModMem() ? "L1" : "L2") << "[r" << srcIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
|
||||
}
|
||||
|
||||
void Instruction::genAddressRegDst(std::ostream& os, int dstIndex) const {
|
||||
if (getModCond() < StoreL3Condition)
|
||||
os << (getModMem() ? "L1" : "L2");
|
||||
else
|
||||
os << "L3";
|
||||
os << "[r" << dstIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
|
||||
}
|
||||
|
||||
void Instruction::genAddressImm(std::ostream& os) const {
|
||||
os << "L3" << "[" << (getImm32() & ScratchpadL3Mask) << "]";
|
||||
}
|
||||
|
||||
void Instruction::h_IADD_RS(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "r" << dstIndex << ", r" << srcIndex;
|
||||
if(dstIndex == RegisterNeedsDisplacement) {
|
||||
os << ", " << (int32_t)getImm32();
|
||||
}
|
||||
os << ", SHFT " << getModShift() << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_IADD_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressImm(os);
|
||||
os << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_ISUB_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_ISUB_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressImm(os);
|
||||
os << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_IMUL_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_IMUL_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressImm(os);
|
||||
os << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_IMULH_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_IMULH_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressImm(os);
|
||||
os << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_ISMULH_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_ISMULH_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressImm(os);
|
||||
os << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_INEG_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
os << "r" << dstIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_IXOR_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_IXOR_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", ";
|
||||
genAddressImm(os);
|
||||
os << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_IROR_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_IROL_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
if (dstIndex != srcIndex) {
|
||||
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
else {
|
||||
os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Instruction::h_IMUL_RCP(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
os << "r" << dstIndex << ", " << getImm32() << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_ISWAP_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSWAP_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
const char reg = (dstIndex >= RegisterCountFlt) ? 'e' : 'f';
|
||||
dstIndex %= RegisterCountFlt;
|
||||
os << reg << dstIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FADD_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegisterCountFlt;
|
||||
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FADD_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "f" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSUB_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegisterCountFlt;
|
||||
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSUB_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "f" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSCAL_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
os << "f" << dstIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FMUL_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegisterCountFlt;
|
||||
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FDIV_M(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "e" << dstIndex << ", ";
|
||||
genAddressReg(os, srcIndex);
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_FSQRT_R(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegisterCountFlt;
|
||||
os << "e" << dstIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_CFROUND(std::ostream& os) const {
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "r" << srcIndex << ", " << (getImm32() & 63) << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_CBRANCH(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
os << "r" << dstIndex << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_ISTORE(std::ostream& os) const {
|
||||
auto dstIndex = dst % RegistersCount;
|
||||
auto srcIndex = src % RegistersCount;
|
||||
genAddressRegDst(os, dstIndex);
|
||||
os << ", r" << srcIndex << std::endl;
|
||||
}
|
||||
|
||||
void Instruction::h_NOP(std::ostream& os) const {
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
#include "instruction_weights.hpp"
|
||||
#define INST_NAME(x) REPN(#x, WT(x))
|
||||
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
|
||||
|
||||
const char* Instruction::names[256] = {
|
||||
INST_NAME(IADD_RS)
|
||||
INST_NAME(IADD_M)
|
||||
INST_NAME(ISUB_R)
|
||||
INST_NAME(ISUB_M)
|
||||
INST_NAME(IMUL_R)
|
||||
INST_NAME(IMUL_M)
|
||||
INST_NAME(IMULH_R)
|
||||
INST_NAME(IMULH_M)
|
||||
INST_NAME(ISMULH_R)
|
||||
INST_NAME(ISMULH_M)
|
||||
INST_NAME(IMUL_RCP)
|
||||
INST_NAME(INEG_R)
|
||||
INST_NAME(IXOR_R)
|
||||
INST_NAME(IXOR_M)
|
||||
INST_NAME(IROR_R)
|
||||
INST_NAME(IROL_R)
|
||||
INST_NAME(ISWAP_R)
|
||||
INST_NAME(FSWAP_R)
|
||||
INST_NAME(FADD_R)
|
||||
INST_NAME(FADD_M)
|
||||
INST_NAME(FSUB_R)
|
||||
INST_NAME(FSUB_M)
|
||||
INST_NAME(FSCAL_R)
|
||||
INST_NAME(FMUL_R)
|
||||
INST_NAME(FDIV_M)
|
||||
INST_NAME(FSQRT_R)
|
||||
INST_NAME(CBRANCH)
|
||||
INST_NAME(CFROUND)
|
||||
INST_NAME(ISTORE)
|
||||
INST_NAME(NOP)
|
||||
};
|
||||
|
||||
InstructionFormatter Instruction::engine[256] = {
|
||||
INST_HANDLE(IADD_RS)
|
||||
INST_HANDLE(IADD_M)
|
||||
INST_HANDLE(ISUB_R)
|
||||
INST_HANDLE(ISUB_M)
|
||||
INST_HANDLE(IMUL_R)
|
||||
INST_HANDLE(IMUL_M)
|
||||
INST_HANDLE(IMULH_R)
|
||||
INST_HANDLE(IMULH_M)
|
||||
INST_HANDLE(ISMULH_R)
|
||||
INST_HANDLE(ISMULH_M)
|
||||
INST_HANDLE(IMUL_RCP)
|
||||
INST_HANDLE(INEG_R)
|
||||
INST_HANDLE(IXOR_R)
|
||||
INST_HANDLE(IXOR_M)
|
||||
INST_HANDLE(IROR_R)
|
||||
INST_HANDLE(IROL_R)
|
||||
INST_HANDLE(ISWAP_R)
|
||||
INST_HANDLE(FSWAP_R)
|
||||
INST_HANDLE(FADD_R)
|
||||
INST_HANDLE(FADD_M)
|
||||
INST_HANDLE(FSUB_R)
|
||||
INST_HANDLE(FSUB_M)
|
||||
INST_HANDLE(FSCAL_R)
|
||||
INST_HANDLE(FMUL_R)
|
||||
INST_HANDLE(FDIV_M)
|
||||
INST_HANDLE(FSQRT_R)
|
||||
INST_HANDLE(CBRANCH)
|
||||
INST_HANDLE(CFROUND)
|
||||
INST_HANDLE(ISTORE)
|
||||
INST_HANDLE(NOP)
|
||||
};
|
||||
|
||||
}
|
||||
149
randomx/instruction.hpp
Normal file
149
randomx/instruction.hpp
Normal file
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <iostream>
|
||||
#include <type_traits>
|
||||
#include "blake2/endian.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Instruction;
|
||||
|
||||
typedef void(Instruction::*InstructionFormatter)(std::ostream&) const;
|
||||
|
||||
enum class InstructionType : uint16_t {
|
||||
IADD_RS = 0,
|
||||
IADD_M = 1,
|
||||
ISUB_R = 2,
|
||||
ISUB_M = 3,
|
||||
IMUL_R = 4,
|
||||
IMUL_M = 5,
|
||||
IMULH_R = 6,
|
||||
IMULH_M = 7,
|
||||
ISMULH_R = 8,
|
||||
ISMULH_M = 9,
|
||||
IMUL_RCP = 10,
|
||||
INEG_R = 11,
|
||||
IXOR_R = 12,
|
||||
IXOR_M = 13,
|
||||
IROR_R = 14,
|
||||
IROL_R = 15,
|
||||
ISWAP_R = 16,
|
||||
FSWAP_R = 17,
|
||||
FADD_R = 18,
|
||||
FADD_M = 19,
|
||||
FSUB_R = 20,
|
||||
FSUB_M = 21,
|
||||
FSCAL_R = 22,
|
||||
FMUL_R = 23,
|
||||
FDIV_M = 24,
|
||||
FSQRT_R = 25,
|
||||
CBRANCH = 26,
|
||||
CFROUND = 27,
|
||||
ISTORE = 28,
|
||||
NOP = 29,
|
||||
};
|
||||
|
||||
class Instruction {
|
||||
public:
|
||||
uint32_t getImm32() const {
|
||||
return load32(&imm32);
|
||||
}
|
||||
void setImm32(uint32_t val) {
|
||||
return store32(&imm32, val);
|
||||
}
|
||||
const char* getName() const {
|
||||
return names[opcode];
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream& os, const Instruction& i) {
|
||||
i.print(os);
|
||||
return os;
|
||||
}
|
||||
int getModMem() const {
|
||||
return mod % 4; //bits 0-1
|
||||
}
|
||||
int getModShift() const {
|
||||
return (mod >> 2) % 4; //bits 2-3
|
||||
}
|
||||
int getModCond() const {
|
||||
return mod >> 4; //bits 4-7
|
||||
}
|
||||
void setMod(uint8_t val) {
|
||||
mod = val;
|
||||
}
|
||||
|
||||
uint8_t opcode;
|
||||
uint8_t dst;
|
||||
uint8_t src;
|
||||
uint8_t mod;
|
||||
uint32_t imm32;
|
||||
private:
|
||||
void print(std::ostream&) const;
|
||||
static const char* names[256];
|
||||
static InstructionFormatter engine[256];
|
||||
void genAddressReg(std::ostream& os, int) const;
|
||||
void genAddressImm(std::ostream& os) const;
|
||||
void genAddressRegDst(std::ostream&, int) const;
|
||||
void h_IADD_RS(std::ostream&) const;
|
||||
void h_IADD_M(std::ostream&) const;
|
||||
void h_ISUB_R(std::ostream&) const;
|
||||
void h_ISUB_M(std::ostream&) const;
|
||||
void h_IMUL_R(std::ostream&) const;
|
||||
void h_IMUL_M(std::ostream&) const;
|
||||
void h_IMULH_R(std::ostream&) const;
|
||||
void h_IMULH_M(std::ostream&) const;
|
||||
void h_ISMULH_R(std::ostream&) const;
|
||||
void h_ISMULH_M(std::ostream&) const;
|
||||
void h_IMUL_RCP(std::ostream&) const;
|
||||
void h_INEG_R(std::ostream&) const;
|
||||
void h_IXOR_R(std::ostream&) const;
|
||||
void h_IXOR_M(std::ostream&) const;
|
||||
void h_IROR_R(std::ostream&) const;
|
||||
void h_IROL_R(std::ostream&) const;
|
||||
void h_ISWAP_R(std::ostream&) const;
|
||||
void h_FSWAP_R(std::ostream&) const;
|
||||
void h_FADD_R(std::ostream&) const;
|
||||
void h_FADD_M(std::ostream&) const;
|
||||
void h_FSUB_R(std::ostream&) const;
|
||||
void h_FSUB_M(std::ostream&) const;
|
||||
void h_FSCAL_R(std::ostream&) const;
|
||||
void h_FMUL_R(std::ostream&) const;
|
||||
void h_FDIV_M(std::ostream&) const;
|
||||
void h_FSQRT_R(std::ostream&) const;
|
||||
void h_CBRANCH(std::ostream&) const;
|
||||
void h_CFROUND(std::ostream&) const;
|
||||
void h_ISTORE(std::ostream&) const;
|
||||
void h_NOP(std::ostream&) const;
|
||||
};
|
||||
|
||||
static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");
|
||||
static_assert(std::is_standard_layout<Instruction>(), "randomx::Instruction must be a standard-layout struct");
|
||||
}
|
||||
73
randomx/instruction_weights.hpp
Normal file
73
randomx/instruction_weights.hpp
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#define REP0(x)
|
||||
#define REP1(x) x,
|
||||
#define REP2(x) REP1(x) x,
|
||||
#define REP3(x) REP2(x) x,
|
||||
#define REP4(x) REP3(x) x,
|
||||
#define REP5(x) REP4(x) x,
|
||||
#define REP6(x) REP5(x) x,
|
||||
#define REP7(x) REP6(x) x,
|
||||
#define REP8(x) REP7(x) x,
|
||||
#define REP9(x) REP8(x) x,
|
||||
#define REP10(x) REP9(x) x,
|
||||
#define REP11(x) REP10(x) x,
|
||||
#define REP12(x) REP11(x) x,
|
||||
#define REP13(x) REP12(x) x,
|
||||
#define REP14(x) REP13(x) x,
|
||||
#define REP15(x) REP14(x) x,
|
||||
#define REP16(x) REP15(x) x,
|
||||
#define REP17(x) REP16(x) x,
|
||||
#define REP18(x) REP17(x) x,
|
||||
#define REP19(x) REP18(x) x,
|
||||
#define REP20(x) REP19(x) x,
|
||||
#define REP21(x) REP20(x) x,
|
||||
#define REP22(x) REP21(x) x,
|
||||
#define REP23(x) REP22(x) x,
|
||||
#define REP24(x) REP23(x) x,
|
||||
#define REP25(x) REP24(x) x,
|
||||
#define REP26(x) REP25(x) x,
|
||||
#define REP27(x) REP26(x) x,
|
||||
#define REP28(x) REP27(x) x,
|
||||
#define REP29(x) REP28(x) x,
|
||||
#define REP30(x) REP29(x) x,
|
||||
#define REP31(x) REP30(x) x,
|
||||
#define REP32(x) REP31(x) x,
|
||||
#define REP33(x) REP32(x) x,
|
||||
#define REP40(x) REP32(x) REP8(x)
|
||||
#define REP64(x) REP32(x) REP32(x)
|
||||
#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
|
||||
#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
|
||||
#define REP256(x) REP128(x) REP128(x)
|
||||
#define REPNX(x,N) REP##N(x)
|
||||
#define REPN(x,N) REPNX(x,N)
|
||||
#define NUM(x) x
|
||||
#define WT(x) NUM(RANDOMX_FREQ_##x)
|
||||
193
randomx/instructions_portable.cpp
Normal file
193
randomx/instructions_portable.cpp
Normal file
@@ -0,0 +1,193 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cfenv>
|
||||
#include <cmath>
|
||||
#include "common.hpp"
|
||||
#include "intrin_portable.h"
|
||||
#include "blake2/endian.h"
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
typedef unsigned __int128 uint128_t;
|
||||
typedef __int128 int128_t;
|
||||
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||
return ((uint128_t)a * b) >> 64;
|
||||
}
|
||||
int64_t smulh(int64_t a, int64_t b) {
|
||||
return ((int128_t)a * b) >> 64;
|
||||
}
|
||||
#define HAVE_MULH
|
||||
#define HAVE_SMULH
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define HAS_VALUE(X) X ## 0
|
||||
#define EVAL_DEFINE(X) HAS_VALUE(X)
|
||||
#include <intrin.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
uint64_t rotl(uint64_t x, unsigned int c) {
|
||||
return _rotl64(x, c);
|
||||
}
|
||||
uint64_t rotr(uint64_t x, unsigned int c) {
|
||||
return _rotr64(x, c);
|
||||
}
|
||||
#define HAVE_ROTL
|
||||
#define HAVE_ROTR
|
||||
|
||||
#if EVAL_DEFINE(__MACHINEARM64_X64(1))
|
||||
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||
return __umulh(a, b);
|
||||
}
|
||||
#define HAVE_MULH
|
||||
#endif
|
||||
|
||||
#if EVAL_DEFINE(__MACHINEX64(1))
|
||||
int64_t smulh(int64_t a, int64_t b) {
|
||||
int64_t hi;
|
||||
_mul128(a, b, &hi);
|
||||
return hi;
|
||||
}
|
||||
#define HAVE_SMULH
|
||||
#endif
|
||||
|
||||
static void setRoundMode_(uint32_t mode) {
|
||||
_controlfp(mode, _MCW_RC);
|
||||
}
|
||||
#define HAVE_SETROUNDMODE_IMPL
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_SETROUNDMODE_IMPL
|
||||
static void setRoundMode_(uint32_t mode) {
|
||||
fesetround(mode);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_ROTR
|
||||
uint64_t rotr(uint64_t a, unsigned int b) {
|
||||
return (a >> b) | (a << (-b & 63));
|
||||
}
|
||||
#define HAVE_ROTR
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_ROTL
|
||||
uint64_t rotl(uint64_t a, unsigned int b) {
|
||||
return (a << b) | (a >> (-b & 63));
|
||||
}
|
||||
#define HAVE_ROTL
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_MULH
|
||||
#define LO(x) ((x)&0xffffffff)
|
||||
#define HI(x) ((x)>>32)
|
||||
uint64_t mulh(uint64_t a, uint64_t b) {
|
||||
uint64_t ah = HI(a), al = LO(a);
|
||||
uint64_t bh = HI(b), bl = LO(b);
|
||||
uint64_t x00 = al * bl;
|
||||
uint64_t x01 = al * bh;
|
||||
uint64_t x10 = ah * bl;
|
||||
uint64_t x11 = ah * bh;
|
||||
uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
|
||||
uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
|
||||
uint64_t m3 = HI(x11) + HI(m2);
|
||||
|
||||
return (m3 << 32) + LO(m2);
|
||||
}
|
||||
#define HAVE_MULH
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_SMULH
|
||||
int64_t smulh(int64_t a, int64_t b) {
|
||||
int64_t hi = mulh(a, b);
|
||||
if (a < 0LL) hi -= b;
|
||||
if (b < 0LL) hi -= a;
|
||||
return hi;
|
||||
}
|
||||
#define HAVE_SMULH
|
||||
#endif
|
||||
|
||||
#ifdef RANDOMX_DEFAULT_FENV
|
||||
|
||||
void rx_reset_float_state() {
|
||||
setRoundMode_(FE_TONEAREST);
|
||||
rx_set_double_precision(); //set precision to 53 bits if needed by the platform
|
||||
}
|
||||
|
||||
void rx_set_rounding_mode(uint32_t mode) {
|
||||
switch (mode & 3) {
|
||||
case RoundDown:
|
||||
setRoundMode_(FE_DOWNWARD);
|
||||
break;
|
||||
case RoundUp:
|
||||
setRoundMode_(FE_UPWARD);
|
||||
break;
|
||||
case RoundToZero:
|
||||
setRoundMode_(FE_TOWARDZERO);
|
||||
break;
|
||||
case RoundToNearest:
|
||||
setRoundMode_(FE_TONEAREST);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef RANDOMX_USE_X87
|
||||
|
||||
#if defined(_MSC_VER) && defined(_M_IX86)
|
||||
|
||||
void rx_set_double_precision() {
|
||||
_control87(_PC_53, _MCW_PC);
|
||||
}
|
||||
|
||||
#elif defined(__i386)
|
||||
|
||||
void rx_set_double_precision() {
|
||||
uint16_t volatile x87cw;
|
||||
asm volatile("fstcw %0" : "=m" (x87cw));
|
||||
x87cw &= ~0x300;
|
||||
x87cw |= 0x200;
|
||||
asm volatile("fldcw %0" : : "m" (x87cw));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif //RANDOMX_USE_X87
|
||||
|
||||
union double_ser_t {
|
||||
double f;
|
||||
uint64_t i;
|
||||
};
|
||||
|
||||
double loadDoublePortable(const void* addr) {
|
||||
double_ser_t ds;
|
||||
ds.i = load64(addr);
|
||||
return ds.f;
|
||||
}
|
||||
738
randomx/intrin_portable.h
Normal file
738
randomx/intrin_portable.h
Normal file
@@ -0,0 +1,738 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include "blake2/endian.h"
|
||||
|
||||
constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x) {
|
||||
return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x);
|
||||
}
|
||||
|
||||
constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x) {
|
||||
return (-1 == ~0) ? (int64_t)x : (x > INT64_MAX ? (-(int64_t)(UINT64_MAX - x) - 1) : (int64_t)x);
|
||||
}
|
||||
|
||||
constexpr uint64_t signExtend2sCompl(uint32_t x) {
|
||||
return (-1 == ~0) ? (int64_t)(int32_t)(x) : (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
|
||||
}
|
||||
|
||||
constexpr int RoundToNearest = 0;
|
||||
constexpr int RoundDown = 1;
|
||||
constexpr int RoundUp = 2;
|
||||
constexpr int RoundToZero = 3;
|
||||
|
||||
//MSVC doesn't define __SSE2__, so we have to define it manually if SSE2 is available
|
||||
#if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))
|
||||
#define __SSE2__ 1
|
||||
#endif
|
||||
|
||||
//MSVC doesn't define __AES__
|
||||
#if defined(_MSC_VER) && defined(__SSE2__)
|
||||
#define __AES__
|
||||
#endif
|
||||
|
||||
//the library "sqrt" function provided by MSVC for x86 targets doesn't give
|
||||
//the correct results, so we have to use inline assembly to call x87 fsqrt directly
|
||||
#if !defined(__SSE2__)
|
||||
#if defined(_MSC_VER) && defined(_M_IX86)
|
||||
inline double __cdecl rx_sqrt(double x) {
|
||||
__asm {
|
||||
fld x
|
||||
fsqrt
|
||||
}
|
||||
}
|
||||
#define rx_sqrt rx_sqrt
|
||||
|
||||
void rx_set_double_precision();
|
||||
#define RANDOMX_USE_X87
|
||||
|
||||
#elif defined(__i386)
|
||||
|
||||
void rx_set_double_precision();
|
||||
#define RANDOMX_USE_X87
|
||||
|
||||
#endif
|
||||
#endif //__SSE2__
|
||||
|
||||
#if !defined(rx_sqrt)
|
||||
#define rx_sqrt sqrt
|
||||
#endif
|
||||
|
||||
#if !defined(RANDOMX_USE_X87)
|
||||
#define rx_set_double_precision(x)
|
||||
#endif
|
||||
|
||||
#ifdef __SSE2__
|
||||
#ifdef __GNUC__
|
||||
#include <x86intrin.h>
|
||||
#else
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
typedef __m128i rx_vec_i128;
|
||||
typedef __m128d rx_vec_f128;
|
||||
|
||||
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
|
||||
#define rx_aligned_free(a) _mm_free(a)
|
||||
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
|
||||
|
||||
#define rx_load_vec_f128 _mm_load_pd
|
||||
#define rx_store_vec_f128 _mm_store_pd
|
||||
#define rx_add_vec_f128 _mm_add_pd
|
||||
#define rx_sub_vec_f128 _mm_sub_pd
|
||||
#define rx_mul_vec_f128 _mm_mul_pd
|
||||
#define rx_div_vec_f128 _mm_div_pd
|
||||
#define rx_sqrt_vec_f128 _mm_sqrt_pd
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||
return _mm_shuffle_pd(a, a, 1);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
|
||||
return _mm_castsi128_pd(_mm_set_epi64x(x1, x0));
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||
return _mm_castsi128_pd(_mm_set1_epi64x(x));
|
||||
}
|
||||
|
||||
#define rx_xor_vec_f128 _mm_xor_pd
|
||||
#define rx_and_vec_f128 _mm_and_pd
|
||||
#define rx_or_vec_f128 _mm_or_pd
|
||||
|
||||
#ifdef __AES__
|
||||
|
||||
#define rx_aesenc_vec_i128 _mm_aesenc_si128
|
||||
#define rx_aesdec_vec_i128 _mm_aesdec_si128
|
||||
|
||||
#define HAVE_AES 1
|
||||
|
||||
#endif //__AES__
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
|
||||
return _mm_cvtsi128_si32(a);
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
|
||||
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
|
||||
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xaa));
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
|
||||
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xff));
|
||||
}
|
||||
|
||||
#define rx_set_int_vec_i128 _mm_set_epi32
|
||||
#define rx_xor_vec_i128 _mm_xor_si128
|
||||
#define rx_load_vec_i128 _mm_load_si128
|
||||
#define rx_store_vec_i128 _mm_store_si128
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||
__m128i ix = _mm_loadl_epi64((const __m128i*)addr);
|
||||
return _mm_cvtepi32_pd(ix);
|
||||
}
|
||||
|
||||
constexpr uint32_t rx_mxcsr_default = 0x9FC0; //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
|
||||
|
||||
FORCE_INLINE void rx_reset_float_state() {
|
||||
_mm_setcsr(rx_mxcsr_default);
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
|
||||
_mm_setcsr(rx_mxcsr_default | (mode << 13));
|
||||
}
|
||||
|
||||
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <cstdlib>
|
||||
#include <altivec.h>
|
||||
#undef vector
|
||||
#undef pixel
|
||||
#undef bool
|
||||
|
||||
typedef __vector uint8_t __m128i;
|
||||
typedef __vector uint32_t __m128l;
|
||||
typedef __vector int __m128li;
|
||||
typedef __vector uint64_t __m128ll;
|
||||
typedef __vector double __m128d;
|
||||
|
||||
typedef __m128i rx_vec_i128;
|
||||
typedef __m128d rx_vec_f128;
|
||||
typedef union{
|
||||
rx_vec_i128 i;
|
||||
rx_vec_f128 d;
|
||||
uint64_t u64[2];
|
||||
double d64[2];
|
||||
uint32_t u32[4];
|
||||
int i32[4];
|
||||
} vec_u;
|
||||
|
||||
#define rx_aligned_alloc(a, b) malloc(a)
|
||||
#define rx_aligned_free(a) free(a)
|
||||
#define rx_prefetch_nta(x)
|
||||
|
||||
/* Splat 64-bit long long to 2 64-bit long longs */
|
||||
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
|
||||
{ return (__m128i) vec_splats (scalar); }
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return (rx_vec_f128)vec_vsx_ld(0,pd);
|
||||
#else
|
||||
vec_u t;
|
||||
t.u64[0] = load64(pd + 0);
|
||||
t.u64[1] = load64(pd + 1);
|
||||
return (rx_vec_f128)t.d;
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
vec_vsx_st(a,0,(rx_vec_f128*)mem_addr);
|
||||
#else
|
||||
vec_u _a;
|
||||
_a.d = a;
|
||||
store64(mem_addr + 0, _a.u64[0]);
|
||||
store64(mem_addr + 1, _a.u64[1]);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||
return (rx_vec_f128)vec_perm((__m128i)a,(__m128i)a,(__m128i){8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7});
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_add(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_sub(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_mul(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_div(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
|
||||
return (rx_vec_f128)vec_sqrt(a);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
|
||||
return (rx_vec_i128)vec_splat2sd(a);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
|
||||
return (rx_vec_f128)a;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
|
||||
return (rx_vec_f128)(__m128ll){x0,x1};
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||
return (rx_vec_f128)vec_splat2sd(x);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_xor(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_and(a,b);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return (rx_vec_f128)vec_or(a,b);
|
||||
}
|
||||
|
||||
#if defined(__CRYPTO__)
|
||||
|
||||
FORCE_INLINE __m128ll vrev(__m128i v){
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0});
|
||||
#else
|
||||
return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12});
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||
__m128ll _v = vrev(v);
|
||||
__m128ll _rkey = vrev(rkey);
|
||||
__m128ll result = vrev((__m128i)__builtin_crypto_vcipher(_v,_rkey));
|
||||
return (rx_vec_i128)result;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||
__m128ll _v = vrev(v);
|
||||
__m128ll zero = (__m128ll){0};
|
||||
__m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero));
|
||||
return (rx_vec_i128)vec_xor((__m128i)out,rkey);
|
||||
}
|
||||
#define HAVE_AES 1
|
||||
|
||||
#endif //__CRYPTO__
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
|
||||
vec_u _a;
|
||||
_a.i = a;
|
||||
return _a.i32[0];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
|
||||
vec_u _a;
|
||||
_a.i = a;
|
||||
return _a.i32[1];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
|
||||
vec_u _a;
|
||||
_a.i = a;
|
||||
return _a.i32[2];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
|
||||
vec_u _a;
|
||||
_a.i = a;
|
||||
return _a.i32[3];
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
|
||||
return (rx_vec_i128)((__m128li){_I0,_I1,_I2,_I3});
|
||||
};
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) {
|
||||
return (rx_vec_i128)vec_xor(_A,_B);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *_P) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *_P;
|
||||
#else
|
||||
uint32_t* ptr = (uint32_t*)_P;
|
||||
vec_u c;
|
||||
c.u32[0] = load32(ptr + 0);
|
||||
c.u32[1] = load32(ptr + 1);
|
||||
c.u32[2] = load32(ptr + 2);
|
||||
c.u32[3] = load32(ptr + 3);
|
||||
return (rx_vec_i128)c.i;
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
*_P = _B;
|
||||
#else
|
||||
uint32_t* ptr = (uint32_t*)_P;
|
||||
vec_u B;
|
||||
B.i = _B;
|
||||
store32(ptr + 0, B.u32[0]);
|
||||
store32(ptr + 1, B.u32[1]);
|
||||
store32(ptr + 2, B.u32[2]);
|
||||
store32(ptr + 3, B.u32[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||
vec_u x;
|
||||
x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
|
||||
x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
|
||||
return (rx_vec_f128)x.d;
|
||||
}
|
||||
|
||||
#define RANDOMX_DEFAULT_FENV
|
||||
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <arm_neon.h>
|
||||
#include <arm_acle.h>
|
||||
|
||||
typedef uint8x16_t rx_vec_i128;
|
||||
typedef float64x2_t rx_vec_f128;
|
||||
|
||||
inline void* rx_aligned_alloc(size_t size, size_t align) {
|
||||
void* p;
|
||||
if (posix_memalign(&p, align, size) == 0)
|
||||
return p;
|
||||
|
||||
return 0;
|
||||
};
|
||||
|
||||
#define rx_aligned_free(a) free(a)
|
||||
|
||||
inline void rx_prefetch_nta(void* ptr) {
|
||||
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
return vld1q_f64((const float64_t*)pd);
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
|
||||
vst1q_f64((float64_t*)mem_addr, val);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||
float64x2_t temp;
|
||||
temp = vcopyq_laneq_f64(temp, 1, a, 1);
|
||||
a = vcopyq_laneq_f64(a, 1, a, 0);
|
||||
return vcopyq_laneq_f64(a, 0, temp, 1);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
|
||||
uint64x2_t temp0 = vdupq_n_u64(x0);
|
||||
uint64x2_t temp1 = vdupq_n_u64(x1);
|
||||
return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||
return vreinterpretq_f64_u64(vdupq_n_u64(x));
|
||||
}
|
||||
|
||||
#define rx_add_vec_f128 vaddq_f64
|
||||
#define rx_sub_vec_f128 vsubq_f64
|
||||
#define rx_mul_vec_f128 vmulq_f64
|
||||
#define rx_div_vec_f128 vdivq_f64
|
||||
#define rx_sqrt_vec_f128 vsqrtq_f64
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
|
||||
}
|
||||
|
||||
#ifdef __ARM_FEATURE_CRYPTO
|
||||
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
|
||||
const uint8x16_t zero = { 0 };
|
||||
return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
|
||||
const uint8x16_t zero = { 0 };
|
||||
return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key;
|
||||
}
|
||||
|
||||
#define HAVE_AES 1
|
||||
|
||||
#endif
|
||||
|
||||
#define rx_xor_vec_i128 veorq_u8
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
|
||||
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0);
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
|
||||
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1);
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
|
||||
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2);
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
|
||||
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
|
||||
int32_t data[4];
|
||||
data[0] = _I0;
|
||||
data[1] = _I1;
|
||||
data[2] = _I2;
|
||||
data[3] = _I3;
|
||||
return vreinterpretq_u8_s32(vld1q_s32(data));
|
||||
};
|
||||
|
||||
#define rx_xor_vec_i128 veorq_u8
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) {
|
||||
return vld1q_u8((const uint8_t*)mem_addr);
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) {
|
||||
vst1q_u8((uint8_t*)mem_addr, val);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||
double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
|
||||
double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
|
||||
rx_vec_f128 x;
|
||||
x = vsetq_lane_f64(lo, x, 0);
|
||||
x = vsetq_lane_f64(hi, x, 1);
|
||||
return x;
|
||||
}
|
||||
|
||||
#define RANDOMX_DEFAULT_FENV
|
||||
|
||||
#else //portable fallback
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
typedef union {
|
||||
uint64_t u64[2];
|
||||
uint32_t u32[4];
|
||||
uint16_t u16[8];
|
||||
uint8_t u8[16];
|
||||
} rx_vec_i128;
|
||||
|
||||
typedef union {
|
||||
struct {
|
||||
double lo;
|
||||
double hi;
|
||||
};
|
||||
rx_vec_i128 i;
|
||||
} rx_vec_f128;
|
||||
|
||||
#define rx_aligned_alloc(a, b) malloc(a)
|
||||
#define rx_aligned_free(a) free(a)
|
||||
#define rx_prefetch_nta(x)
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
|
||||
rx_vec_f128 x;
|
||||
x.i.u64[0] = load64(pd + 0);
|
||||
x.i.u64[1] = load64(pd + 1);
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
|
||||
store64(mem_addr + 0, a.i.u64[0]);
|
||||
store64(mem_addr + 1, a.i.u64[1]);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
|
||||
double temp = a.hi;
|
||||
a.hi = a.lo;
|
||||
a.lo = temp;
|
||||
return a;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = a.lo + b.lo;
|
||||
x.hi = a.hi + b.hi;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = a.lo - b.lo;
|
||||
x.hi = a.hi - b.hi;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = a.lo * b.lo;
|
||||
x.hi = a.hi * b.hi;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = a.lo / b.lo;
|
||||
x.hi = a.hi / b.hi;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = rx_sqrt(a.lo);
|
||||
x.hi = rx_sqrt(a.hi);
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
|
||||
rx_vec_i128 x;
|
||||
x.u64[0] = a;
|
||||
x.u64[1] = a;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
|
||||
rx_vec_f128 x;
|
||||
x.i = a;
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
|
||||
rx_vec_f128 v;
|
||||
v.i.u64[0] = x0;
|
||||
v.i.u64[1] = x1;
|
||||
return v;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
|
||||
rx_vec_f128 v;
|
||||
v.i.u64[0] = x;
|
||||
v.i.u64[1] = x;
|
||||
return v;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
|
||||
x.i.u64[1] = a.i.u64[1] ^ b.i.u64[1];
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.i.u64[0] = a.i.u64[0] & b.i.u64[0];
|
||||
x.i.u64[1] = a.i.u64[1] & b.i.u64[1];
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
|
||||
rx_vec_f128 x;
|
||||
x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
|
||||
x.i.u64[1] = a.i.u64[1] | b.i.u64[1];
|
||||
return x;
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
|
||||
return a.u32[0];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
|
||||
return a.u32[1];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
|
||||
return a.u32[2];
|
||||
}
|
||||
|
||||
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
|
||||
return a.u32[3];
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
|
||||
rx_vec_i128 v;
|
||||
v.u32[0] = _I0;
|
||||
v.u32[1] = _I1;
|
||||
v.u32[2] = _I2;
|
||||
v.u32[3] = _I3;
|
||||
return v;
|
||||
};
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) {
|
||||
rx_vec_i128 c;
|
||||
c.u32[0] = _A.u32[0] ^ _B.u32[0];
|
||||
c.u32[1] = _A.u32[1] ^ _B.u32[1];
|
||||
c.u32[2] = _A.u32[2] ^ _B.u32[2];
|
||||
c.u32[3] = _A.u32[3] ^ _B.u32[3];
|
||||
return c;
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const*_P) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
return *_P;
|
||||
#else
|
||||
uint32_t* ptr = (uint32_t*)_P;
|
||||
rx_vec_i128 c;
|
||||
c.u32[0] = load32(ptr + 0);
|
||||
c.u32[1] = load32(ptr + 1);
|
||||
c.u32[2] = load32(ptr + 2);
|
||||
c.u32[3] = load32(ptr + 3);
|
||||
return c;
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) {
|
||||
#if defined(NATIVE_LITTLE_ENDIAN)
|
||||
*_P = _B;
|
||||
#else
|
||||
uint32_t* ptr = (uint32_t*)_P;
|
||||
store32(ptr + 0, _B.u32[0]);
|
||||
store32(ptr + 1, _B.u32[1]);
|
||||
store32(ptr + 2, _B.u32[2]);
|
||||
store32(ptr + 3, _B.u32[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
|
||||
rx_vec_f128 x;
|
||||
x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
|
||||
x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
|
||||
return x;
|
||||
}
|
||||
|
||||
#define RANDOMX_DEFAULT_FENV
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_AES
|
||||
static const char* platformError = "Platform doesn't support hardware AES";
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||
throw std::runtime_error(platformError);
|
||||
}
|
||||
|
||||
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
|
||||
throw std::runtime_error(platformError);
|
||||
}
|
||||
|
||||
#define HAVE_AES 0
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef RANDOMX_DEFAULT_FENV
|
||||
|
||||
void rx_reset_float_state();
|
||||
|
||||
void rx_set_rounding_mode(uint32_t mode);
|
||||
|
||||
#endif
|
||||
|
||||
double loadDoublePortable(const void* addr);
|
||||
uint64_t mulh(uint64_t, uint64_t);
|
||||
int64_t smulh(int64_t, int64_t);
|
||||
uint64_t rotl(uint64_t, unsigned int);
|
||||
uint64_t rotr(uint64_t, unsigned int);
|
||||
37
randomx/jit_compiler.hpp
Normal file
37
randomx/jit_compiler.hpp
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#include "jit_compiler_x86.hpp"
|
||||
#elif defined(__aarch64__)
|
||||
#include "jit_compiler_a64.hpp"
|
||||
#else
|
||||
#include "jit_compiler_fallback.hpp"
|
||||
#endif
|
||||
76
randomx/jit_compiler_fallback.hpp
Normal file
76
randomx/jit_compiler_fallback.hpp
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <stdexcept>
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
|
||||
class JitCompilerFallback {
|
||||
public:
|
||||
JitCompilerFallback() {
|
||||
throw std::runtime_error("JIT compilation is not supported on this platform");
|
||||
}
|
||||
void generateProgram(Program&, ProgramConfiguration&) {
|
||||
|
||||
}
|
||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
|
||||
|
||||
}
|
||||
template<size_t N>
|
||||
void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &) {
|
||||
|
||||
}
|
||||
void generateDatasetInitCode() {
|
||||
|
||||
}
|
||||
ProgramFunc* getProgramFunc() {
|
||||
return nullptr;
|
||||
}
|
||||
DatasetInitFunc* getDatasetInitFunc() {
|
||||
return nullptr;
|
||||
}
|
||||
uint8_t* getCode() {
|
||||
return nullptr;
|
||||
}
|
||||
size_t getCodeSize() {
|
||||
return 0;
|
||||
}
|
||||
void enableWriting() {}
|
||||
void enableExecution() {}
|
||||
void enableAll() {}
|
||||
};
|
||||
}
|
||||
845
randomx/jit_compiler_x86.cpp
Normal file
845
randomx/jit_compiler_x86.cpp
Normal file
@@ -0,0 +1,845 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdexcept>
|
||||
#include <cstring>
|
||||
#include <climits>
|
||||
#include "jit_compiler_x86.hpp"
|
||||
#include "jit_compiler_x86_static.hpp"
|
||||
#include "superscalar.hpp"
|
||||
#include "program.hpp"
|
||||
#include "reciprocal.h"
|
||||
#include "virtual_memory.hpp"
|
||||
|
||||
namespace randomx {
|
||||
/*
|
||||
|
||||
REGISTER ALLOCATION:
|
||||
|
||||
; rax -> temporary
|
||||
; rbx -> iteration counter "ic"
|
||||
; rcx -> temporary
|
||||
; rdx -> temporary
|
||||
; rsi -> scratchpad pointer
|
||||
; rdi -> dataset pointer
|
||||
; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits)
|
||||
; rsp -> stack pointer
|
||||
; r8 -> "r0"
|
||||
; r9 -> "r1"
|
||||
; r10 -> "r2"
|
||||
; r11 -> "r3"
|
||||
; r12 -> "r4"
|
||||
; r13 -> "r5"
|
||||
; r14 -> "r6"
|
||||
; r15 -> "r7"
|
||||
; xmm0 -> "f0"
|
||||
; xmm1 -> "f1"
|
||||
; xmm2 -> "f2"
|
||||
; xmm3 -> "f3"
|
||||
; xmm4 -> "e0"
|
||||
; xmm5 -> "e1"
|
||||
; xmm6 -> "e2"
|
||||
; xmm7 -> "e3"
|
||||
; xmm8 -> "a0"
|
||||
; xmm9 -> "a1"
|
||||
; xmm10 -> "a2"
|
||||
; xmm11 -> "a3"
|
||||
; xmm12 -> temporary
|
||||
; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
|
||||
; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000******
|
||||
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
|
||||
|
||||
*/
|
||||
|
||||
//Calculate the required code buffer size that is sufficient for the largest possible program:
|
||||
|
||||
constexpr size_t MaxRandomXInstrCodeSize = 32; //FDIV_M requires up to 32 bytes of x86 code
|
||||
constexpr size_t MaxSuperscalarInstrSize = 14; //IMUL_RCP requires 14 bytes of x86 code
|
||||
constexpr size_t SuperscalarProgramHeader = 128; //overhead per superscalar program
|
||||
constexpr size_t CodeAlign = 4096; //align code size to a multiple of 4 KiB
|
||||
constexpr size_t ReserveCodeSize = CodeAlign; //function prologue/epilogue + reserve
|
||||
|
||||
constexpr size_t RandomXCodeSize = alignSize(ReserveCodeSize + MaxRandomXInstrCodeSize * RANDOMX_PROGRAM_SIZE, CodeAlign);
|
||||
constexpr size_t SuperscalarSize = alignSize(ReserveCodeSize + (SuperscalarProgramHeader + MaxSuperscalarInstrSize * SuperscalarMaxSize) * RANDOMX_CACHE_ACCESSES, CodeAlign);
|
||||
|
||||
static_assert(RandomXCodeSize < INT32_MAX / 2, "RandomXCodeSize is too large");
|
||||
static_assert(SuperscalarSize < INT32_MAX / 2, "SuperscalarSize is too large");
|
||||
|
||||
constexpr uint32_t CodeSize = RandomXCodeSize + SuperscalarSize;
|
||||
|
||||
constexpr int32_t superScalarHashOffset = RandomXCodeSize;
|
||||
|
||||
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
|
||||
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
|
||||
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
|
||||
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
|
||||
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
|
||||
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
|
||||
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
|
||||
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
|
||||
const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store;
|
||||
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
|
||||
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
|
||||
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
|
||||
const uint8_t* codeShhLoad = (uint8_t*)&randomx_sshash_load;
|
||||
const uint8_t* codeShhPrefetch = (uint8_t*)&randomx_sshash_prefetch;
|
||||
const uint8_t* codeShhEnd = (uint8_t*)&randomx_sshash_end;
|
||||
const uint8_t* codeShhInit = (uint8_t*)&randomx_sshash_init;
|
||||
|
||||
const int32_t prologueSize = codeLoopBegin - codePrologue;
|
||||
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
|
||||
const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
|
||||
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
|
||||
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
|
||||
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
|
||||
const int32_t datasetInitSize = codeEpilogue - codeDatasetInit;
|
||||
const int32_t epilogueSize = codeShhLoad - codeEpilogue;
|
||||
const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad;
|
||||
const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
|
||||
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
|
||||
|
||||
const int32_t epilogueOffset = CodeSize - epilogueSize;
|
||||
|
||||
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
|
||||
static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 };
|
||||
static const uint8_t REX_SUB_RR[] = { 0x4d, 0x2b };
|
||||
static const uint8_t REX_SUB_RM[] = { 0x4c, 0x2b };
|
||||
static const uint8_t REX_MOV_RR[] = { 0x41, 0x8b };
|
||||
static const uint8_t REX_MOV_RR64[] = { 0x49, 0x8b };
|
||||
static const uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b };
|
||||
static const uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf };
|
||||
static const uint8_t REX_IMUL_RRI[] = { 0x4d, 0x69 };
|
||||
static const uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf };
|
||||
static const uint8_t REX_MUL_R[] = { 0x49, 0xf7 };
|
||||
static const uint8_t REX_MUL_M[] = { 0x48, 0xf7 };
|
||||
static const uint8_t REX_81[] = { 0x49, 0x81 };
|
||||
static const uint8_t AND_EAX_I = 0x25;
|
||||
static const uint8_t MOV_EAX_I = 0xb8;
|
||||
static const uint8_t MOV_RAX_I[] = { 0x48, 0xb8 };
|
||||
static const uint8_t MOV_RCX_I[] = { 0x48, 0xb9 };
|
||||
static const uint8_t REX_LEA[] = { 0x4f, 0x8d };
|
||||
static const uint8_t REX_MUL_MEM[] = { 0x48, 0xf7, 0x24, 0x0e };
|
||||
static const uint8_t REX_IMUL_MEM[] = { 0x48, 0xf7, 0x2c, 0x0e };
|
||||
static const uint8_t REX_SHR_RAX[] = { 0x48, 0xc1, 0xe8 };
|
||||
static const uint8_t RAX_ADD_SBB_1[] = { 0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00 };
|
||||
static const uint8_t MUL_RCX[] = { 0x48, 0xf7, 0xe1 };
|
||||
static const uint8_t REX_SHR_RDX[] = { 0x48, 0xc1, 0xea };
|
||||
static const uint8_t REX_SH[] = { 0x49, 0xc1 };
|
||||
static const uint8_t MOV_RCX_RAX_SAR_RCX_63[] = { 0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f };
|
||||
static const uint8_t AND_ECX_I[] = { 0x81, 0xe1 };
|
||||
static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
|
||||
static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
|
||||
static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
|
||||
static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 };
|
||||
static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 };
|
||||
static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
|
||||
static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
|
||||
static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
|
||||
static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
|
||||
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 };
|
||||
static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
|
||||
static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
|
||||
static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
|
||||
static const uint8_t REX_XOR_RM[] = { 0x4c, 0x33 };
|
||||
static const uint8_t REX_ROT_CL[] = { 0x49, 0xd3 };
|
||||
static const uint8_t REX_ROT_I8[] = { 0x49, 0xc1 };
|
||||
static const uint8_t SHUFPD[] = { 0x66, 0x0f, 0xc6 };
|
||||
static const uint8_t REX_ADDPD[] = { 0x66, 0x41, 0x0f, 0x58 };
|
||||
static const uint8_t REX_CVTDQ2PD_XMM12[] = { 0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06 };
|
||||
static const uint8_t REX_SUBPD[] = { 0x66, 0x41, 0x0f, 0x5c };
|
||||
static const uint8_t REX_XORPS[] = { 0x41, 0x0f, 0x57 };
|
||||
static const uint8_t REX_MULPD[] = { 0x66, 0x41, 0x0f, 0x59 };
|
||||
static const uint8_t REX_MAXPD[] = { 0x66, 0x41, 0x0f, 0x5f };
|
||||
static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e };
|
||||
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
|
||||
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58 };
|
||||
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
|
||||
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
|
||||
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
|
||||
static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 };
|
||||
static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 };
|
||||
static const uint8_t REX_MOV_MR[] = { 0x4c, 0x89 };
|
||||
static const uint8_t REX_XOR_EAX[] = { 0x41, 0x33 };
|
||||
static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 };
|
||||
static const uint8_t JNZ[] = { 0x0f, 0x85 };
|
||||
static const uint8_t JMP = 0xe9;
|
||||
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
|
||||
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
|
||||
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
|
||||
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
|
||||
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
|
||||
static const uint8_t CALL = 0xe8;
|
||||
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
|
||||
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
|
||||
static const uint8_t JZ[] = { 0x0f, 0x84 };
|
||||
static const uint8_t RET = 0xc3;
|
||||
static const uint8_t LEA_32[] = { 0x41, 0x8d };
|
||||
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
|
||||
static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 };
|
||||
|
||||
static const uint8_t NOP1[] = { 0x90 };
|
||||
static const uint8_t NOP2[] = { 0x66, 0x90 };
|
||||
static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 };
|
||||
static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 };
|
||||
static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
|
||||
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
|
||||
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
|
||||
|
||||
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
|
||||
|
||||
size_t JitCompilerX86::getCodeSize() {
|
||||
return CodeSize;
|
||||
}
|
||||
|
||||
JitCompilerX86::JitCompilerX86() {
|
||||
code = (uint8_t*)allocMemoryPages(CodeSize);
|
||||
memcpy(code, codePrologue, prologueSize);
|
||||
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
|
||||
}
|
||||
|
||||
JitCompilerX86::~JitCompilerX86() {
|
||||
freePagedMemory(code, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::enableAll() {
|
||||
setPagesRWX(code, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::enableWriting() {
|
||||
setPagesRW(code, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::enableExecution() {
|
||||
setPagesRX(code, CodeSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
|
||||
//printf("---\n");
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
memcpy(code + codePos, codeReadDataset, readDatasetSize);
|
||||
codePos += readDatasetSize;
|
||||
generateProgramEpilogue(prog, pcfg);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
|
||||
generateProgramPrologue(prog, pcfg);
|
||||
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
|
||||
emit(ADD_EBX_I);
|
||||
emit32(datasetOffset / CacheLineSize);
|
||||
emitByte(CALL);
|
||||
emit32(superScalarHashOffset - (codePos + 4));
|
||||
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
|
||||
generateProgramEpilogue(prog, pcfg);
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &reciprocalCache) {
|
||||
printf("xcccc\n");
|
||||
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
|
||||
codePos = superScalarHashOffset + codeSshInitSize;
|
||||
for (unsigned j = 0; j < N; ++j) {
|
||||
SuperscalarProgram& prog = programs[j];
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
generateSuperscalarCode(instr, reciprocalCache);
|
||||
}
|
||||
emit(codeShhLoad, codeSshLoadSize);
|
||||
if (j < N - 1) {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xd8 + prog.getAddressRegister());
|
||||
emit(codeShhPrefetch, codeSshPrefetchSize);
|
||||
#ifdef RANDOMX_ALIGN
|
||||
int align = (codePos % 16);
|
||||
while (align != 0) {
|
||||
int nopSize = 16 - align;
|
||||
if (nopSize > 8) nopSize = 8;
|
||||
emit(NOPX[nopSize - 1], nopSize);
|
||||
align = (codePos % 16);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
emitByte(RET);
|
||||
}
|
||||
|
||||
template
|
||||
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector<uint64_t> &reciprocalCache);
|
||||
|
||||
void JitCompilerX86::generateDatasetInitCode() {
|
||||
memcpy(code, codeDatasetInit, datasetInitSize);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
instructionOffsets.clear();
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
registerUsage[i] = -1;
|
||||
}
|
||||
|
||||
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
|
||||
code[codePos + sizeof(REX_XOR_RAX_R64)] = 0xc0 + pcfg.readReg0;
|
||||
code[codePos + sizeof(REX_XOR_RAX_R64) * 2 + 1] = 0xc0 + pcfg.readReg1;
|
||||
|
||||
codePos = prologueSize;
|
||||
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
|
||||
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
|
||||
codePos += loopLoadSize;
|
||||
for (unsigned i = 0; i < prog.getSize(); ++i) {
|
||||
Instruction& instr = prog(i);
|
||||
instr.src %= RegistersCount;
|
||||
instr.dst %= RegistersCount;
|
||||
generateCode(instr, i);
|
||||
}
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc0 + pcfg.readReg2);
|
||||
emit(REX_XOR_EAX);
|
||||
emitByte(0xc0 + pcfg.readReg3);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + pcfg.readReg0);
|
||||
emit(REX_XOR_RAX_R64);
|
||||
emitByte(0xc0 + pcfg.readReg1);
|
||||
emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad));
|
||||
memcpy(code + codePos, codeLoopStore, loopStoreSize);
|
||||
codePos += loopStoreSize;
|
||||
emit(SUB_EBX);
|
||||
emit(JNZ);
|
||||
emit32(prologueSize - codePos - 4);
|
||||
emitByte(JMP);
|
||||
emit32(epilogueOffset - codePos - 4);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateCode(Instruction& instr, int i) {
|
||||
instructionOffsets.push_back(codePos);
|
||||
auto generator = engine[instr.opcode];
|
||||
(this->*generator)(instr, i);
|
||||
}
|
||||
|
||||
void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector<uint64_t> &reciprocalCache) {
|
||||
switch ((SuperscalarInstructionType)instr.opcode)
|
||||
{
|
||||
case randomx::SuperscalarInstructionType::ISUB_R:
|
||||
emit(REX_SUB_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_R:
|
||||
emit(REX_XOR_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_RS:
|
||||
emit(REX_LEA);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
genSIB(instr.getModShift(), instr.src, instr.dst);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMUL_R:
|
||||
emit(REX_IMUL_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IROR_C:
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C7:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C7:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C8:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
#ifdef RANDOMX_ALIGN
|
||||
emit(NOP1);
|
||||
#endif
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C8:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
#ifdef RANDOMX_ALIGN
|
||||
emit(NOP1);
|
||||
#endif
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IADD_C9:
|
||||
emit(REX_81);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
#ifdef RANDOMX_ALIGN
|
||||
emit(NOP2);
|
||||
#endif
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IXOR_C9:
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
#ifdef RANDOMX_ALIGN
|
||||
emit(NOP2);
|
||||
#endif
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMULH_R:
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe0 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::ISMULH_R:
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe8 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
break;
|
||||
case randomx::SuperscalarInstructionType::IMUL_RCP:
|
||||
emit(MOV_RAX_I);
|
||||
emit64(reciprocalCache[instr.getImm32()]);
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0xc0 + 8 * instr.dst);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
|
||||
emit(LEA_32);
|
||||
emitByte(0x80 + instr.src + (rax ? 0 : 8));
|
||||
if (instr.src == RegisterNeedsSib) {
|
||||
emitByte(0x24);
|
||||
}
|
||||
emit32(instr.getImm32());
|
||||
if (rax)
|
||||
emitByte(AND_EAX_I);
|
||||
else
|
||||
emit(AND_ECX_I);
|
||||
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressRegDst(Instruction& instr) {
|
||||
emit(LEA_32);
|
||||
emitByte(0x80 + instr.dst);
|
||||
if (instr.dst == RegisterNeedsSib) {
|
||||
emitByte(0x24);
|
||||
}
|
||||
emit32(instr.getImm32());
|
||||
emitByte(AND_EAX_I);
|
||||
if (instr.getModCond() < StoreL3Condition) {
|
||||
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
|
||||
}
|
||||
else {
|
||||
emit32(ScratchpadL3Mask);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genAddressImm(Instruction& instr) {
|
||||
emit32(instr.getImm32() & ScratchpadL3Mask);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_LEA);
|
||||
if (instr.dst == RegisterNeedsDisplacement)
|
||||
emitByte(0xac);
|
||||
else
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
genSIB(instr.getModShift(), instr.src, instr.dst);
|
||||
if (instr.dst == RegisterNeedsDisplacement)
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_ADD_RM);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
emitByte(0x06);
|
||||
}
|
||||
else {
|
||||
emit(REX_ADD_RM);
|
||||
emitByte(0x86 + 8 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::genSIB(int scale, int index, int base) {
|
||||
emitByte((scale << 6) | (index << 3) | base);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_SUB_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
emit(REX_81);
|
||||
emitByte(0xe8 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_SUB_RM);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
emitByte(0x06);
|
||||
}
|
||||
else {
|
||||
emit(REX_SUB_RM);
|
||||
emitByte(0x86 + 8 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_IMUL_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
emit(REX_IMUL_RRI);
|
||||
emitByte(0xc0 + 9 * instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
emitByte(0x06);
|
||||
}
|
||||
else {
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0x86 + 8 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe0 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, false);
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_MEM);
|
||||
}
|
||||
else {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_M);
|
||||
emitByte(0xa6);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_R);
|
||||
emitByte(0xe8 + instr.src);
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr, false);
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_IMUL_MEM);
|
||||
}
|
||||
else {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emit(REX_MUL_M);
|
||||
emitByte(0xae);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
emit(REX_MOV_R64R);
|
||||
emitByte(0xc2 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
|
||||
uint64_t divisor = instr.getImm32();
|
||||
if (!isZeroOrPowerOf2(divisor)) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(MOV_RAX_I);
|
||||
emit64(randomx_reciprocal_fast(divisor));
|
||||
emit(REX_IMUL_RM);
|
||||
emitByte(0xc0 + 8 * instr.dst);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
emit(REX_NEG);
|
||||
emitByte(0xd8 + instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_XOR_RR);
|
||||
emitByte(0xc0 + 8 * instr.dst + instr.src);
|
||||
}
|
||||
else {
|
||||
emit(REX_XOR_RI);
|
||||
emitByte(0xf0 + instr.dst);
|
||||
emit32(instr.getImm32());
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
genAddressReg(instr);
|
||||
emit(REX_XOR_RM);
|
||||
emitByte(0x04 + 8 * instr.dst);
|
||||
emitByte(0x06);
|
||||
}
|
||||
else {
|
||||
emit(REX_XOR_RM);
|
||||
emitByte(0x86 + 8 * instr.dst);
|
||||
genAddressImm(instr);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc8 + instr.src);
|
||||
emit(REX_ROT_CL);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
}
|
||||
else {
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc8 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_IROL_R(Instruction& instr, int i) {
|
||||
registerUsage[instr.dst] = i;
|
||||
if (instr.src != instr.dst) {
|
||||
emit(REX_MOV_RR);
|
||||
emitByte(0xc8 + instr.src);
|
||||
emit(REX_ROT_CL);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
}
|
||||
else {
|
||||
emit(REX_ROT_I8);
|
||||
emitByte(0xc0 + instr.dst);
|
||||
emitByte(instr.getImm32() & 63);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) {
|
||||
if (instr.src != instr.dst) {
|
||||
registerUsage[instr.dst] = i;
|
||||
registerUsage[instr.src] = i;
|
||||
emit(REX_XCHG);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) {
|
||||
emit(SHUFPD);
|
||||
emitByte(0xc0 + 9 * instr.dst);
|
||||
emitByte(1);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_ADDPD);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_ADDPD);
|
||||
emitByte(0xc4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_SUBPD);
|
||||
emitByte(0xc0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_SUBPD);
|
||||
emitByte(0xc4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
emit(REX_XORPS);
|
||||
emitByte(0xc7 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
instr.src %= RegisterCountFlt;
|
||||
emit(REX_MULPD);
|
||||
emitByte(0xe0 + instr.src + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
genAddressReg(instr);
|
||||
emit(REX_CVTDQ2PD_XMM12);
|
||||
emit(REX_ANDPS_XMM12);
|
||||
emit(REX_DIVPD);
|
||||
emitByte(0xe4 + 8 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
|
||||
instr.dst %= RegisterCountFlt;
|
||||
emit(SQRTPD);
|
||||
emitByte(0xe4 + 9 * instr.dst);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_CFROUND(Instruction& instr, int i) {
|
||||
emit(REX_MOV_RR64);
|
||||
emitByte(0xc0 + instr.src);
|
||||
int rotate = (13 - (instr.getImm32() & 63)) & 63;
|
||||
if (rotate != 0) {
|
||||
emit(ROL_RAX);
|
||||
emitByte(rotate);
|
||||
}
|
||||
emit(AND_OR_MOV_LDMXCSR);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
|
||||
int reg = instr.dst;
|
||||
int target = registerUsage[reg] + 1;
|
||||
emit(REX_ADD_I);
|
||||
emitByte(0xc0 + reg);
|
||||
int shift = instr.getModCond() + ConditionOffset;
|
||||
uint32_t imm = instr.getImm32() | (1UL << shift);
|
||||
if (ConditionOffset > 0 || shift > 0)
|
||||
imm &= ~(1UL << (shift - 1));
|
||||
emit32(imm);
|
||||
emit(REX_TEST);
|
||||
emitByte(0xc0 + reg);
|
||||
emit32(ConditionMask << shift);
|
||||
emit(JZ);
|
||||
emit32(instructionOffsets[target] - (codePos + 4));
|
||||
//mark all registers as used
|
||||
for (unsigned j = 0; j < RegistersCount; ++j) {
|
||||
registerUsage[j] = i;
|
||||
}
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
|
||||
genAddressRegDst(instr);
|
||||
emit(REX_MOV_MR);
|
||||
emitByte(0x04 + 8 * instr.src);
|
||||
emitByte(0x06);
|
||||
}
|
||||
|
||||
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
|
||||
emit(NOP1);
|
||||
}
|
||||
|
||||
#include "instruction_weights.hpp"
|
||||
#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x))
|
||||
|
||||
InstructionGeneratorX86 JitCompilerX86::engine[256] = {
|
||||
INST_HANDLE(IADD_RS)
|
||||
INST_HANDLE(IADD_M)
|
||||
INST_HANDLE(ISUB_R)
|
||||
INST_HANDLE(ISUB_M)
|
||||
INST_HANDLE(IMUL_R)
|
||||
INST_HANDLE(IMUL_M)
|
||||
INST_HANDLE(IMULH_R)
|
||||
INST_HANDLE(IMULH_M)
|
||||
INST_HANDLE(ISMULH_R)
|
||||
INST_HANDLE(ISMULH_M)
|
||||
INST_HANDLE(IMUL_RCP)
|
||||
INST_HANDLE(INEG_R)
|
||||
INST_HANDLE(IXOR_R)
|
||||
INST_HANDLE(IXOR_M)
|
||||
INST_HANDLE(IROR_R)
|
||||
INST_HANDLE(IROL_R)
|
||||
INST_HANDLE(ISWAP_R)
|
||||
INST_HANDLE(FSWAP_R)
|
||||
INST_HANDLE(FADD_R)
|
||||
INST_HANDLE(FADD_M)
|
||||
INST_HANDLE(FSUB_R)
|
||||
INST_HANDLE(FSUB_M)
|
||||
INST_HANDLE(FSCAL_R)
|
||||
INST_HANDLE(FMUL_R)
|
||||
INST_HANDLE(FDIV_M)
|
||||
INST_HANDLE(FSQRT_R)
|
||||
INST_HANDLE(CBRANCH)
|
||||
INST_HANDLE(CFROUND)
|
||||
INST_HANDLE(ISTORE)
|
||||
INST_HANDLE(NOP)
|
||||
};
|
||||
|
||||
}
|
||||
142
randomx/jit_compiler_x86.hpp
Normal file
142
randomx/jit_compiler_x86.hpp
Normal file
@@ -0,0 +1,142 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include "common.hpp"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
class Program;
|
||||
class ProgramConfiguration;
|
||||
class SuperscalarProgram;
|
||||
class JitCompilerX86;
|
||||
class Instruction;
|
||||
|
||||
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
|
||||
|
||||
class JitCompilerX86 {
|
||||
public:
|
||||
JitCompilerX86();
|
||||
~JitCompilerX86();
|
||||
void generateProgram(Program&, ProgramConfiguration&);
|
||||
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
|
||||
template<size_t N>
|
||||
void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
|
||||
void generateDatasetInitCode();
|
||||
ProgramFunc* getProgramFunc() {
|
||||
return (ProgramFunc*)code;
|
||||
}
|
||||
DatasetInitFunc* getDatasetInitFunc() {
|
||||
return (DatasetInitFunc*)code;
|
||||
}
|
||||
uint8_t* getCode() {
|
||||
return code;
|
||||
}
|
||||
size_t getCodeSize();
|
||||
void enableWriting();
|
||||
void enableExecution();
|
||||
void enableAll();
|
||||
private:
|
||||
static InstructionGeneratorX86 engine[256];
|
||||
std::vector<int32_t> instructionOffsets;
|
||||
int registerUsage[RegistersCount];
|
||||
uint8_t* code;
|
||||
int32_t codePos;
|
||||
|
||||
void generateProgramPrologue(Program&, ProgramConfiguration&);
|
||||
void generateProgramEpilogue(Program&, ProgramConfiguration&);
|
||||
void genAddressReg(Instruction&, bool);
|
||||
void genAddressRegDst(Instruction&);
|
||||
void genAddressImm(Instruction&);
|
||||
void genSIB(int scale, int index, int base);
|
||||
|
||||
void generateCode(Instruction&, int);
|
||||
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
|
||||
|
||||
void emitByte(uint8_t val) {
|
||||
code[codePos] = val;
|
||||
codePos++;
|
||||
}
|
||||
|
||||
void emit32(uint32_t val) {
|
||||
memcpy(code + codePos, &val, sizeof val);
|
||||
codePos += sizeof val;
|
||||
}
|
||||
|
||||
void emit64(uint64_t val) {
|
||||
memcpy(code + codePos, &val, sizeof val);
|
||||
codePos += sizeof val;
|
||||
}
|
||||
|
||||
template<size_t N>
|
||||
void emit(const uint8_t (&src)[N]) {
|
||||
emit(src, N);
|
||||
}
|
||||
|
||||
void emit(const uint8_t* src, size_t count) {
|
||||
memcpy(code + codePos, src, count);
|
||||
codePos += count;
|
||||
}
|
||||
|
||||
void h_IADD_RS(Instruction&, int);
|
||||
void h_IADD_M(Instruction&, int);
|
||||
void h_ISUB_R(Instruction&, int);
|
||||
void h_ISUB_M(Instruction&, int);
|
||||
void h_IMUL_R(Instruction&, int);
|
||||
void h_IMUL_M(Instruction&, int);
|
||||
void h_IMULH_R(Instruction&, int);
|
||||
void h_IMULH_M(Instruction&, int);
|
||||
void h_ISMULH_R(Instruction&, int);
|
||||
void h_ISMULH_M(Instruction&, int);
|
||||
void h_IMUL_RCP(Instruction&, int);
|
||||
void h_INEG_R(Instruction&, int);
|
||||
void h_IXOR_R(Instruction&, int);
|
||||
void h_IXOR_M(Instruction&, int);
|
||||
void h_IROR_R(Instruction&, int);
|
||||
void h_IROL_R(Instruction&, int);
|
||||
void h_ISWAP_R(Instruction&, int);
|
||||
void h_FSWAP_R(Instruction&, int);
|
||||
void h_FADD_R(Instruction&, int);
|
||||
void h_FADD_M(Instruction&, int);
|
||||
void h_FSUB_R(Instruction&, int);
|
||||
void h_FSUB_M(Instruction&, int);
|
||||
void h_FSCAL_R(Instruction&, int);
|
||||
void h_FMUL_R(Instruction&, int);
|
||||
void h_FDIV_M(Instruction&, int);
|
||||
void h_FSQRT_R(Instruction&, int);
|
||||
void h_CBRANCH(Instruction&, int);
|
||||
void h_CFROUND(Instruction&, int);
|
||||
void h_ISTORE(Instruction&, int);
|
||||
void h_NOP(Instruction&, int);
|
||||
};
|
||||
|
||||
}
|
||||
232
randomx/jit_compiler_x86_static.S
Normal file
232
randomx/jit_compiler_x86_static.S
Normal file
@@ -0,0 +1,232 @@
|
||||
# Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
#
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of the copyright holder nor the
|
||||
# names of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
.intel_syntax noprefix
|
||||
#if defined(__APPLE__)
|
||||
.text
|
||||
#define DECL(x) _##x
|
||||
#else
|
||||
.section .text
|
||||
#define DECL(x) x
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__CYGWIN__)
|
||||
#define WINABI
|
||||
#endif
|
||||
|
||||
.global DECL(randomx_prefetch_scratchpad)
|
||||
.global DECL(randomx_prefetch_scratchpad_end)
|
||||
.global DECL(randomx_program_prologue)
|
||||
.global DECL(randomx_program_prologue_first_load)
|
||||
.global DECL(randomx_program_loop_begin)
|
||||
.global DECL(randomx_program_loop_load)
|
||||
.global DECL(randomx_program_start)
|
||||
.global DECL(randomx_program_read_dataset)
|
||||
.global DECL(randomx_program_read_dataset_sshash_init)
|
||||
.global DECL(randomx_program_read_dataset_sshash_fin)
|
||||
.global DECL(randomx_program_loop_store)
|
||||
.global DECL(randomx_program_loop_end)
|
||||
.global DECL(randomx_dataset_init)
|
||||
.global DECL(randomx_program_epilogue)
|
||||
.global DECL(randomx_sshash_load)
|
||||
.global DECL(randomx_sshash_prefetch)
|
||||
.global DECL(randomx_sshash_end)
|
||||
.global DECL(randomx_sshash_init)
|
||||
.global DECL(randomx_program_end)
|
||||
.global DECL(randomx_reciprocal_fast)
|
||||
|
||||
#include "configuration.h"
|
||||
|
||||
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
|
||||
#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64)
|
||||
#define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1)
|
||||
#define RANDOMX_ALIGN 4096
|
||||
#define SUPERSCALAR_OFFSET ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
|
||||
|
||||
#define db .byte
|
||||
|
||||
DECL(randomx_prefetch_scratchpad):
|
||||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rax]
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rdx]
|
||||
|
||||
DECL(randomx_prefetch_scratchpad_end):
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_prologue):
|
||||
#if defined(WINABI)
|
||||
#include "asm/program_prologue_win64.inc"
|
||||
#else
|
||||
#include "asm/program_prologue_linux.inc"
|
||||
#endif
|
||||
movapd xmm13, xmmword ptr [mantissaMask+rip]
|
||||
movapd xmm14, xmmword ptr [exp240+rip]
|
||||
movapd xmm15, xmmword ptr [scaleMask+rip]
|
||||
|
||||
DECL(randomx_program_prologue_first_load):
|
||||
xor rax, r8
|
||||
xor rax, r8
|
||||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
jmp DECL(randomx_program_loop_begin)
|
||||
|
||||
.balign 64
|
||||
#include "asm/program_xmm_constants.inc"
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_loop_begin):
|
||||
nop
|
||||
|
||||
DECL(randomx_program_loop_load):
|
||||
#include "asm/program_loop_load.inc"
|
||||
|
||||
DECL(randomx_program_start):
|
||||
nop
|
||||
|
||||
DECL(randomx_program_read_dataset):
|
||||
#include "asm/program_read_dataset.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_sshash_init):
|
||||
#include "asm/program_read_dataset_sshash_init.inc"
|
||||
|
||||
DECL(randomx_program_read_dataset_sshash_fin):
|
||||
#include "asm/program_read_dataset_sshash_fin.inc"
|
||||
|
||||
DECL(randomx_program_loop_store):
|
||||
#include "asm/program_loop_store.inc"
|
||||
|
||||
DECL(randomx_program_loop_end):
|
||||
nop
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_dataset_init):
|
||||
push rbx
|
||||
push rbp
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
#if defined(WINABI)
|
||||
push rdi
|
||||
push rsi
|
||||
mov rdi, qword ptr [rcx] ;# cache->memory
|
||||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
#else
|
||||
mov rdi, qword ptr [rdi] ;# cache->memory
|
||||
;# dataset in rsi
|
||||
mov rbp, rdx ;# block index
|
||||
push rcx ;# max. block index
|
||||
#endif
|
||||
init_block_loop:
|
||||
prefetchw byte ptr [rsi]
|
||||
mov rbx, rbp
|
||||
.byte 232 ;# 0xE8 = call
|
||||
.int SUPERSCALAR_OFFSET - (call_offset - DECL(randomx_dataset_init))
|
||||
call_offset:
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
add rbp, 1
|
||||
add rsi, 64
|
||||
cmp rbp, qword ptr [rsp]
|
||||
jb init_block_loop
|
||||
pop rax
|
||||
#if defined(WINABI)
|
||||
pop rsi
|
||||
pop rdi
|
||||
#endif
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_epilogue):
|
||||
#include "asm/program_epilogue_store.inc"
|
||||
#if defined(WINABI)
|
||||
#include "asm/program_epilogue_win64.inc"
|
||||
#else
|
||||
#include "asm/program_epilogue_linux.inc"
|
||||
#endif
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_sshash_load):
|
||||
#include "asm/program_sshash_load.inc"
|
||||
|
||||
DECL(randomx_sshash_prefetch):
|
||||
#include "asm/program_sshash_prefetch.inc"
|
||||
|
||||
DECL(randomx_sshash_end):
|
||||
nop
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_sshash_init):
|
||||
lea r8, [rbx+1]
|
||||
#include "asm/program_sshash_prefetch.inc"
|
||||
imul r8, qword ptr [r0_mul+rip]
|
||||
mov r9, qword ptr [r1_add+rip]
|
||||
xor r9, r8
|
||||
mov r10, qword ptr [r2_add+rip]
|
||||
xor r10, r8
|
||||
mov r11, qword ptr [r3_add+rip]
|
||||
xor r11, r8
|
||||
mov r12, qword ptr [r4_add+rip]
|
||||
xor r12, r8
|
||||
mov r13, qword ptr [r5_add+rip]
|
||||
xor r13, r8
|
||||
mov r14, qword ptr [r6_add+rip]
|
||||
xor r14, r8
|
||||
mov r15, qword ptr [r7_add+rip]
|
||||
xor r15, r8
|
||||
jmp DECL(randomx_program_end)
|
||||
|
||||
.balign 64
|
||||
#include "asm/program_sshash_constants.inc"
|
||||
|
||||
.balign 64
|
||||
DECL(randomx_program_end):
|
||||
nop
|
||||
|
||||
DECL(randomx_reciprocal_fast):
|
||||
#if !defined(WINABI)
|
||||
mov rcx, rdi
|
||||
#endif
|
||||
#include "asm/randomx_reciprocal.inc"
|
||||
227
randomx/jit_compiler_x86_static.asm
Normal file
227
randomx/jit_compiler_x86_static.asm
Normal file
@@ -0,0 +1,227 @@
|
||||
; Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
;
|
||||
; All rights reserved.
|
||||
;
|
||||
; Redistribution and use in source and binary forms, with or without
|
||||
; modification, are permitted provided that the following conditions are met:
|
||||
; * Redistributions of source code must retain the above copyright
|
||||
; notice, this list of conditions and the following disclaimer.
|
||||
; * Redistributions in binary form must reproduce the above copyright
|
||||
; notice, this list of conditions and the following disclaimer in the
|
||||
; documentation and/or other materials provided with the distribution.
|
||||
; * Neither the name of the copyright holder nor the
|
||||
; names of its contributors may be used to endorse or promote products
|
||||
; derived from this software without specific prior written permission.
|
||||
;
|
||||
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
IFDEF RAX
|
||||
|
||||
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
|
||||
|
||||
PUBLIC randomx_prefetch_scratchpad
|
||||
PUBLIC randomx_prefetch_scratchpad_end
|
||||
PUBLIC randomx_program_prologue
|
||||
PUBLIC randomx_program_prologue_first_load
|
||||
PUBLIC randomx_program_loop_begin
|
||||
PUBLIC randomx_program_loop_load
|
||||
PUBLIC randomx_program_start
|
||||
PUBLIC randomx_program_read_dataset
|
||||
PUBLIC randomx_program_read_dataset_sshash_init
|
||||
PUBLIC randomx_program_read_dataset_sshash_fin
|
||||
PUBLIC randomx_dataset_init
|
||||
PUBLIC randomx_program_loop_store
|
||||
PUBLIC randomx_program_loop_end
|
||||
PUBLIC randomx_program_epilogue
|
||||
PUBLIC randomx_sshash_load
|
||||
PUBLIC randomx_sshash_prefetch
|
||||
PUBLIC randomx_sshash_end
|
||||
PUBLIC randomx_sshash_init
|
||||
PUBLIC randomx_program_end
|
||||
PUBLIC randomx_reciprocal_fast
|
||||
|
||||
include asm/configuration.asm
|
||||
|
||||
RANDOMX_SCRATCHPAD_MASK EQU (RANDOMX_SCRATCHPAD_L3-64)
|
||||
RANDOMX_DATASET_BASE_MASK EQU (RANDOMX_DATASET_BASE_SIZE-64)
|
||||
RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1)
|
||||
RANDOMX_ALIGN EQU 4096
|
||||
SUPERSCALAR_OFFSET EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
|
||||
|
||||
randomx_prefetch_scratchpad PROC
|
||||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rax]
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
prefetcht0 [rsi+rdx]
|
||||
randomx_prefetch_scratchpad ENDP
|
||||
|
||||
randomx_prefetch_scratchpad_end PROC
|
||||
randomx_prefetch_scratchpad_end ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_prologue PROC
|
||||
include asm/program_prologue_win64.inc
|
||||
movapd xmm13, xmmword ptr [mantissaMask]
|
||||
movapd xmm14, xmmword ptr [exp240]
|
||||
movapd xmm15, xmmword ptr [scaleMask]
|
||||
randomx_program_prologue ENDP
|
||||
|
||||
randomx_program_prologue_first_load PROC
|
||||
xor rax, r8
|
||||
xor rax, r8
|
||||
mov rdx, rax
|
||||
and eax, RANDOMX_SCRATCHPAD_MASK
|
||||
ror rdx, 32
|
||||
and edx, RANDOMX_SCRATCHPAD_MASK
|
||||
jmp randomx_program_loop_begin
|
||||
randomx_program_prologue_first_load ENDP
|
||||
|
||||
ALIGN 64
|
||||
include asm/program_xmm_constants.inc
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_loop_begin PROC
|
||||
nop
|
||||
randomx_program_loop_begin ENDP
|
||||
|
||||
randomx_program_loop_load PROC
|
||||
include asm/program_loop_load.inc
|
||||
randomx_program_loop_load ENDP
|
||||
|
||||
randomx_program_start PROC
|
||||
nop
|
||||
randomx_program_start ENDP
|
||||
|
||||
randomx_program_read_dataset PROC
|
||||
include asm/program_read_dataset.inc
|
||||
randomx_program_read_dataset ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_init PROC
|
||||
include asm/program_read_dataset_sshash_init.inc
|
||||
randomx_program_read_dataset_sshash_init ENDP
|
||||
|
||||
randomx_program_read_dataset_sshash_fin PROC
|
||||
include asm/program_read_dataset_sshash_fin.inc
|
||||
randomx_program_read_dataset_sshash_fin ENDP
|
||||
|
||||
randomx_program_loop_store PROC
|
||||
include asm/program_loop_store.inc
|
||||
randomx_program_loop_store ENDP
|
||||
|
||||
randomx_program_loop_end PROC
|
||||
nop
|
||||
randomx_program_loop_end ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_dataset_init PROC
|
||||
push rbx
|
||||
push rbp
|
||||
push rdi
|
||||
push rsi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdi, qword ptr [rcx] ;# cache->memory
|
||||
mov rsi, rdx ;# dataset
|
||||
mov rbp, r8 ;# block index
|
||||
push r9 ;# max. block index
|
||||
init_block_loop:
|
||||
prefetchw byte ptr [rsi]
|
||||
mov rbx, rbp
|
||||
db 232 ;# 0xE8 = call
|
||||
dd SUPERSCALAR_OFFSET - distance
|
||||
distance equ $ - offset randomx_dataset_init
|
||||
mov qword ptr [rsi+0], r8
|
||||
mov qword ptr [rsi+8], r9
|
||||
mov qword ptr [rsi+16], r10
|
||||
mov qword ptr [rsi+24], r11
|
||||
mov qword ptr [rsi+32], r12
|
||||
mov qword ptr [rsi+40], r13
|
||||
mov qword ptr [rsi+48], r14
|
||||
mov qword ptr [rsi+56], r15
|
||||
add rbp, 1
|
||||
add rsi, 64
|
||||
cmp rbp, qword ptr [rsp]
|
||||
jb init_block_loop
|
||||
pop r9
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rsi
|
||||
pop rdi
|
||||
pop rbp
|
||||
pop rbx
|
||||
ret
|
||||
randomx_dataset_init ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_epilogue PROC
|
||||
include asm/program_epilogue_store.inc
|
||||
include asm/program_epilogue_win64.inc
|
||||
randomx_program_epilogue ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_sshash_load PROC
|
||||
include asm/program_sshash_load.inc
|
||||
randomx_sshash_load ENDP
|
||||
|
||||
randomx_sshash_prefetch PROC
|
||||
include asm/program_sshash_prefetch.inc
|
||||
randomx_sshash_prefetch ENDP
|
||||
|
||||
randomx_sshash_end PROC
|
||||
nop
|
||||
randomx_sshash_end ENDP
|
||||
|
||||
ALIGN 64
|
||||
randomx_sshash_init PROC
|
||||
lea r8, [rbx+1]
|
||||
include asm/program_sshash_prefetch.inc
|
||||
imul r8, qword ptr [r0_mul]
|
||||
mov r9, qword ptr [r1_add]
|
||||
xor r9, r8
|
||||
mov r10, qword ptr [r2_add]
|
||||
xor r10, r8
|
||||
mov r11, qword ptr [r3_add]
|
||||
xor r11, r8
|
||||
mov r12, qword ptr [r4_add]
|
||||
xor r12, r8
|
||||
mov r13, qword ptr [r5_add]
|
||||
xor r13, r8
|
||||
mov r14, qword ptr [r6_add]
|
||||
xor r14, r8
|
||||
mov r15, qword ptr [r7_add]
|
||||
xor r15, r8
|
||||
jmp randomx_program_end
|
||||
randomx_sshash_init ENDP
|
||||
|
||||
ALIGN 64
|
||||
include asm/program_sshash_constants.inc
|
||||
|
||||
ALIGN 64
|
||||
randomx_program_end PROC
|
||||
nop
|
||||
randomx_program_end ENDP
|
||||
|
||||
randomx_reciprocal_fast PROC
|
||||
include asm/randomx_reciprocal.inc
|
||||
randomx_reciprocal_fast ENDP
|
||||
|
||||
_RANDOMX_JITX86_STATIC ENDS
|
||||
|
||||
ENDIF
|
||||
|
||||
END
|
||||
51
randomx/jit_compiler_x86_static.hpp
Normal file
51
randomx/jit_compiler_x86_static.hpp
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
extern "C" {
|
||||
void randomx_prefetch_scratchpad();
|
||||
void randomx_prefetch_scratchpad_end();
|
||||
void randomx_program_prologue();
|
||||
void randomx_program_prologue_first_load();
|
||||
void randomx_program_loop_begin();
|
||||
void randomx_program_loop_load();
|
||||
void randomx_program_start();
|
||||
void randomx_program_read_dataset();
|
||||
void randomx_program_read_dataset_sshash_init();
|
||||
void randomx_program_read_dataset_sshash_fin();
|
||||
void randomx_program_loop_store();
|
||||
void randomx_program_loop_end();
|
||||
void randomx_dataset_init();
|
||||
void randomx_program_epilogue();
|
||||
void randomx_sshash_load();
|
||||
void randomx_sshash_prefetch();
|
||||
void randomx_sshash_end();
|
||||
void randomx_sshash_init();
|
||||
void randomx_program_end();
|
||||
}
|
||||
35
randomx/mingw-std-threads-master/CMakeLists.txt
Normal file
35
randomx/mingw-std-threads-master/CMakeLists.txt
Normal file
@@ -0,0 +1,35 @@
|
||||
project(mingw_stdthreads)
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
option(MINGW_STDTHREADS_BUILD_TEST "Build tests")
|
||||
option(MINGW_STDTHREADS_GENERATE_STDHEADERS "Generate std-like headers")
|
||||
|
||||
string(CONCAT mingw_stdthreads_dir_docstring
|
||||
"Optional. When generating std-like headers , this variable can be set"
|
||||
"to manually specify the path to mingw-stdthreads directory containing"
|
||||
"original library headers.")
|
||||
set(MINGW_STDTHREADS_DIR "${PROJECT_SOURCE_DIR}"
|
||||
CACHE PATH ${mingw_stdthreads_dir_docstring})
|
||||
|
||||
# mingw-stdthreads is a header-only library, so make it a INTERFACE target
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
target_include_directories(${PROJECT_NAME} INTERFACE "${PROJECT_SOURCE_DIR}")
|
||||
|
||||
if(MINGW_STDTHREADS_GENERATE_STDHEADERS)
|
||||
# Check if we are using gcc or clang
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
|
||||
# Add as dependency and generate std headers
|
||||
add_subdirectory(cmake_stdheaders_generator)
|
||||
target_link_libraries(${PROJECT_NAME} INTERFACE
|
||||
cmake_stdheaders_generator)
|
||||
else()
|
||||
message(WARNING "Cannot generate std headers with this compiler: "
|
||||
${CMAKE_CXX_COMPILER_ID} ". "
|
||||
"Please fall back to #include <mingw.xxx.h>")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Build tests.exe
|
||||
if(MINGW_STDTHREADS_BUILD_TEST)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
24
randomx/mingw-std-threads-master/LICENSE
Normal file
24
randomx/mingw-std-threads-master/LICENSE
Normal file
@@ -0,0 +1,24 @@
|
||||
Copyright (c) 2016, Mega Limited
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice, this
|
||||
list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
58
randomx/mingw-std-threads-master/README.md
Normal file
58
randomx/mingw-std-threads-master/README.md
Normal file
@@ -0,0 +1,58 @@
|
||||
mingw-std-threads
|
||||
=================
|
||||
|
||||
Implementation of standard C++11 threading classes, which are currently still missing on MinGW GCC.
|
||||
|
||||
Target Windows version
|
||||
----------------------
|
||||
This implementation should work with Windows XP (regardless of service pack), or newer.
|
||||
The library automatically detects the version of Windows that is being targeted (at compile time), and selects an implementation that takes advantage of available Windows features.
|
||||
In MinGW GCC, the target Windows version may optionally be selected by the command-line option `-D _WIN32_WINNT=...`.
|
||||
Use `0x0600` for Windows Vista, or `0x0601` for Windows 7.
|
||||
See "[Modifying `WINVER` and `_WIN32_WINNT`](https://docs.microsoft.com/en-us/cpp/porting/modifying-winver-and-win32-winnt)" for more details.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
This is a header-only library. To use, just include the corresponding `mingw.xxx.h file`, where `xxx` would be the name of the standard header that you would normally include.
|
||||
|
||||
For example, `#include "mingw.thread.h"` replaces `#include <thread>`.
|
||||
|
||||
A `CMakeLists.txt` has also been provided. You can add it to your project by using `add_subdirectory()`, and then this library can be added as your targets' dependency by using `target_link_libraries(YOUR_TARGET PRIVATE mingw_stdthreads)`. By default it just adds a include path, allowing you to include headers using angle brackets (for example `#include <mingw.thread.h>`). But you can also provide options to let it generate "std-like" headers (see next paragraph).
|
||||
|
||||
Using "std-like" headers
|
||||
------------------------
|
||||
|
||||
Probably you don't really want to replace all your includes from `#include <header>` to `#include "mingw.header.h"`. So if you are using GCC or clang, here are some ways to make you happy :)
|
||||
|
||||
With CMake, you just need to turn on the option `MINGW_STDTHREADS_GENERATE_STDHEADERS` before adding mingw-stdthreads, something like this:
|
||||
```CMake
|
||||
option(MINGW_STDTHREADS_GENERATE_STDHEADERS "" ON)
|
||||
add_subdirectory(mingw_stdthreads)
|
||||
target_link_libraries(${TARGET} PRIVATE mingw_stdthreads)
|
||||
```
|
||||
When CMake generates project files, headers named in the "standard header" way will be generated and added to your include path. Then you can avoid stuffs like `mingw.thread.h`, and keep using `#include <thread>` like always. In addition, `MINGW_STDTHREADS_GENERATED_STDHEADERS` will be defined, you can use this macro to check if those generated headers are actually available.
|
||||
|
||||
If you aren't using CMake, you can use one of the three scripts inside [utility_scripts](utility_scripts) directory to manually generate those "std-like" headers. Note that this requires Microsoft Power Shell, so if you are cross-compiling, you would need to install Power Shell.
|
||||
|
||||
Compatibility
|
||||
-------------
|
||||
|
||||
This code has been tested to work with MinGW-w64 5.3.0, but should work with any other MinGW version that has the `std` threading classes missing, has C++11 support for lambda functions, variadic templates, and has working mutex helper classes in `<mutex>`.
|
||||
|
||||
Switching from the win32-pthread based implementation
|
||||
-----------------------------------------------------
|
||||
It seems that recent versions of MinGW-w64 include a Win32 port of pthreads, and have the `std::thread`, `std::mutex`, etc. classes implemented and working based on that compatibility
|
||||
layer.
|
||||
That is a somewhat heavier implementation, as it relies on an abstraction layer, so you may still want to use this implementation for efficiency purposes.
|
||||
Unfortunately you can't use this library standalone and independent of the system `<mutex>` headers, as it relies on those headers for `std::unique_lock` and other non-trivial utility classes.
|
||||
In that case you will need to edit the `c++-config.h` file of your MinGW setup and comment out the definition of _GLIBCXX_HAS_GTHREADS.
|
||||
This will cause the system headers not to define the actual `thread`, `mutex`, etc. classes, but still define the necessary utility classes.
|
||||
|
||||
Why MinGW has no threading classes
|
||||
----------------------------------
|
||||
It seems that for cross-platform threading implementation, the GCC standard library relies on the gthreads/pthreads library.
|
||||
If this library is not available, as is the case with MinGW, the classes `std::thread`, `std::mutex`, `std::condition_variable` are not defined.
|
||||
However, various usable helper classes are still defined in the system headers.
|
||||
Hence, this implementation does not re-define them, and instead includes those headers.
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
project(cmake_stdheaders_generator)
|
||||
|
||||
set(output_include_path "${PROJECT_BINARY_DIR}/${PROJECT_NAME}")
|
||||
message("${PROJECT_NAME}: output_include_path set to ${output_include_path}")
|
||||
|
||||
function(generate_mingw_stdthreads_header header_file_name
|
||||
mingw_stdthreads_folder)
|
||||
set(template_file_path "${PROJECT_SOURCE_DIR}/template.cpp")
|
||||
set(destination_file_path "${output_include_path}/${header_file_name}")
|
||||
|
||||
# Check if compiler is gcc or clang
|
||||
if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
|
||||
# Actually this should never happen because it should have already
|
||||
# been checked in the parent CMakeLists.txt
|
||||
message(FATAL_ERROR "Unsupported compiler")
|
||||
endif()
|
||||
|
||||
# Call g++ to retrieve header path
|
||||
# The -H option will let g++ outputs header dependencies to stderr
|
||||
set(compiler_arguments
|
||||
${template_file_path}
|
||||
-H
|
||||
"-DMINGW_STDTHREADS_DETECTING_SYSTEM_HEADER=<${header_file_name}>")
|
||||
# And content of stderr is saved to variable compiler_output
|
||||
execute_process(COMMAND "${CMAKE_CXX_COMPILER}" ${compiler_arguments}
|
||||
ERROR_VARIABLE compiler_output
|
||||
OUTPUT_QUIET)
|
||||
|
||||
# Get full path to system header
|
||||
string(REGEX MATCH "[.] ([^\r\n]*)" _ "${compiler_output}")
|
||||
set(mingw_stdthreads_headers_generator_system_header "${CMAKE_MATCH_1}")
|
||||
message("Matched: <${mingw_stdthreads_headers_generator_system_header}>")
|
||||
|
||||
# Ensure file exists
|
||||
if(NOT EXISTS "${mingw_stdthreads_headers_generator_system_header}")
|
||||
message(FATAL_ERROR "<${header_file_name}>'s path not found, "
|
||||
"compiler output was:\n${compiler_output}")
|
||||
endif()
|
||||
|
||||
# Get full path to mingw-stdthreads header
|
||||
set(mingw_stdthreads_headers_generator_library_header
|
||||
"${mingw_stdthreads_folder}/mingw.${header_file_name}.h")
|
||||
|
||||
# Normalize paths
|
||||
file(TO_CMAKE_PATH "${mingw_stdthreads_headers_generator_system_header}"
|
||||
mingw_stdthreads_headers_generator_system_header)
|
||||
file(TO_CMAKE_PATH "${mingw_stdthreads_headers_generator_library_header}"
|
||||
mingw_stdthreads_headers_generator_library_header)
|
||||
|
||||
configure_file("${template_file_path}" "${destination_file_path}")
|
||||
endfunction()
|
||||
|
||||
if(EXISTS "${MINGW_STDTHREADS_DIR}")
|
||||
message("${PROJECT_NAME}: MINGW_STDTHREADS_DIR: "
|
||||
"${MINGW_STDTHREADS_DIR}")
|
||||
else()
|
||||
message(FATAL_ERROR "${PROECT_NAME}: MINGW_STDTHREADS_DIR does not "
|
||||
"exist: ${MINGW_STDTHREADS_DIR}")
|
||||
endif()
|
||||
|
||||
# <condition_variable>
|
||||
generate_mingw_stdthreads_header(condition_variable "${MINGW_STDTHREADS_DIR}")
|
||||
# <future>
|
||||
generate_mingw_stdthreads_header(future "${MINGW_STDTHREADS_DIR}")
|
||||
# <mutex>
|
||||
generate_mingw_stdthreads_header(mutex "${MINGW_STDTHREADS_DIR}")
|
||||
# <shared_mutex>
|
||||
generate_mingw_stdthreads_header(shared_mutex "${MINGW_STDTHREADS_DIR}")
|
||||
# <thread>
|
||||
generate_mingw_stdthreads_header(thread "${MINGW_STDTHREADS_DIR}")
|
||||
|
||||
# the generated headers are to be considered as a header only library
|
||||
# so we create an interface target
|
||||
add_library(${PROJECT_NAME} INTERFACE)
|
||||
target_compile_definitions(${PROJECT_NAME} INTERFACE
|
||||
MINGW_STDTHREADS_GENERATED_STDHEADERS)
|
||||
target_include_directories(${PROJECT_NAME} INTERFACE "${output_include_path}")
|
||||
@@ -0,0 +1,11 @@
|
||||
#ifdef MINGW_STDTHREADS_DETECTING_SYSTEM_HEADER
|
||||
#include MINGW_STDTHREADS_DETECTING_SYSTEM_HEADER
|
||||
static_assert(false, "Prevent compilation")
|
||||
#else
|
||||
#pragma once
|
||||
// both system header and mignw-stdthreads header should already have include
|
||||
// guards. But we still add a #pragma once just to be safe.
|
||||
|
||||
#include "${mingw_stdthreads_headers_generator_system_header}"
|
||||
#include "${mingw_stdthreads_headers_generator_library_header}"
|
||||
#endif
|
||||
564
randomx/mingw-std-threads-master/mingw.condition_variable.h
Normal file
564
randomx/mingw-std-threads-master/mingw.condition_variable.h
Normal file
@@ -0,0 +1,564 @@
|
||||
/**
|
||||
* @file condition_variable.h
|
||||
* @brief std::condition_variable implementation for MinGW
|
||||
*
|
||||
* (c) 2013-2016 by Mega Limited, Auckland, New Zealand
|
||||
* @author Alexander Vassilev
|
||||
*
|
||||
* @copyright Simplified (2-clause) BSD License.
|
||||
* You should have received a copy of the license along with this
|
||||
* program.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* @note
|
||||
* This file may become part of the mingw-w64 runtime package. If/when this happens,
|
||||
* the appropriate license will be added, i.e. this code will become dual-licensed,
|
||||
* and the current BSD 2-clause license will stay.
|
||||
*/
|
||||
|
||||
#ifndef MINGW_CONDITIONAL_VARIABLE_H
|
||||
#define MINGW_CONDITIONAL_VARIABLE_H
|
||||
|
||||
#if !defined(__cplusplus) || (__cplusplus < 201103L)
|
||||
#error A C++11 compiler is required!
|
||||
#endif
|
||||
// Use the standard classes for std::, if available.
|
||||
#include <condition_variable>
|
||||
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <system_error>
|
||||
|
||||
#include <sdkddkver.h> // Detect Windows version.
|
||||
#if (WINVER < _WIN32_WINNT_VISTA)
|
||||
#include <atomic>
|
||||
#endif
|
||||
#if (defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR))
|
||||
#pragma message "The Windows API that MinGW-w32 provides is not fully compatible\
|
||||
with Microsoft's API. We'll try to work around this, but we can make no\
|
||||
guarantees. This problem does not exist in MinGW-w64."
|
||||
#include <windows.h> // No further granularity can be expected.
|
||||
#else
|
||||
#if (WINVER < _WIN32_WINNT_VISTA)
|
||||
#include <windef.h>
|
||||
#include <winbase.h> // For CreateSemaphore
|
||||
#include <handleapi.h>
|
||||
#endif
|
||||
#include <synchapi.h>
|
||||
#endif
|
||||
|
||||
#include "mingw.mutex.h"
|
||||
#include "mingw.shared_mutex.h"
|
||||
|
||||
#if !defined(_WIN32_WINNT) || (_WIN32_WINNT < 0x0501)
|
||||
#error To use the MinGW-std-threads library, you will need to define the macro _WIN32_WINNT to be 0x0501 (Windows XP) or higher.
|
||||
#endif
|
||||
|
||||
namespace mingw_stdthread
|
||||
{
|
||||
#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
|
||||
enum class cv_status { no_timeout, timeout };
|
||||
#else
|
||||
using std::cv_status;
|
||||
#endif
|
||||
namespace xp
|
||||
{
|
||||
// Include the XP-compatible condition_variable classes only if actually
|
||||
// compiling for XP. The XP-compatible classes are slower than the newer
|
||||
// versions, and depend on features not compatible with Windows Phone 8.
|
||||
#if (WINVER < _WIN32_WINNT_VISTA)
|
||||
class condition_variable_any
|
||||
{
|
||||
recursive_mutex mMutex {};
|
||||
std::atomic<int> mNumWaiters {0};
|
||||
HANDLE mSemaphore;
|
||||
HANDLE mWakeEvent {};
|
||||
public:
|
||||
using native_handle_type = HANDLE;
|
||||
native_handle_type native_handle()
|
||||
{
|
||||
return mSemaphore;
|
||||
}
|
||||
condition_variable_any(const condition_variable_any&) = delete;
|
||||
condition_variable_any& operator=(const condition_variable_any&) = delete;
|
||||
condition_variable_any()
|
||||
: mSemaphore(CreateSemaphoreA(NULL, 0, 0xFFFF, NULL))
|
||||
{
|
||||
if (mSemaphore == NULL)
|
||||
throw std::system_error(GetLastError(), std::generic_category());
|
||||
mWakeEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
if (mWakeEvent == NULL)
|
||||
{
|
||||
CloseHandle(mSemaphore);
|
||||
throw std::system_error(GetLastError(), std::generic_category());
|
||||
}
|
||||
}
|
||||
~condition_variable_any()
|
||||
{
|
||||
CloseHandle(mWakeEvent);
|
||||
CloseHandle(mSemaphore);
|
||||
}
|
||||
private:
|
||||
template <class M>
|
||||
bool wait_impl(M& lock, DWORD timeout)
|
||||
{
|
||||
{
|
||||
lock_guard<recursive_mutex> guard(mMutex);
|
||||
mNumWaiters++;
|
||||
}
|
||||
lock.unlock();
|
||||
DWORD ret = WaitForSingleObject(mSemaphore, timeout);
|
||||
|
||||
mNumWaiters--;
|
||||
SetEvent(mWakeEvent);
|
||||
lock.lock();
|
||||
if (ret == WAIT_OBJECT_0)
|
||||
return true;
|
||||
else if (ret == WAIT_TIMEOUT)
|
||||
return false;
|
||||
//2 possible cases:
|
||||
//1)The point in notify_all() where we determine the count to
|
||||
//increment the semaphore with has not been reached yet:
|
||||
//we just need to decrement mNumWaiters, but setting the event does not hurt
|
||||
//
|
||||
//2)Semaphore has just been released with mNumWaiters just before
|
||||
//we decremented it. This means that the semaphore count
|
||||
//after all waiters finish won't be 0 - because not all waiters
|
||||
//woke up by acquiring the semaphore - we woke up by a timeout.
|
||||
//The notify_all() must handle this gracefully
|
||||
//
|
||||
else
|
||||
{
|
||||
using namespace std;
|
||||
throw system_error(make_error_code(errc::protocol_error));
|
||||
}
|
||||
}
|
||||
public:
|
||||
template <class M>
|
||||
void wait(M& lock)
|
||||
{
|
||||
wait_impl(lock, INFINITE);
|
||||
}
|
||||
template <class M, class Predicate>
|
||||
void wait(M& lock, Predicate pred)
|
||||
{
|
||||
while(!pred())
|
||||
{
|
||||
wait(lock);
|
||||
};
|
||||
}
|
||||
|
||||
void notify_all() noexcept
|
||||
{
|
||||
lock_guard<recursive_mutex> lock(mMutex); //block any further wait requests until all current waiters are unblocked
|
||||
if (mNumWaiters.load() <= 0)
|
||||
return;
|
||||
|
||||
ReleaseSemaphore(mSemaphore, mNumWaiters, NULL);
|
||||
while(mNumWaiters > 0)
|
||||
{
|
||||
auto ret = WaitForSingleObject(mWakeEvent, 1000);
|
||||
if (ret == WAIT_FAILED || ret == WAIT_ABANDONED)
|
||||
std::terminate();
|
||||
}
|
||||
assert(mNumWaiters == 0);
|
||||
//in case some of the waiters timed out just after we released the
|
||||
//semaphore by mNumWaiters, it won't be zero now, because not all waiters
|
||||
//woke up by acquiring the semaphore. So we must zero the semaphore before
|
||||
//we accept waiters for the next event
|
||||
//See _wait_impl for details
|
||||
while(WaitForSingleObject(mSemaphore, 0) == WAIT_OBJECT_0);
|
||||
}
|
||||
void notify_one() noexcept
|
||||
{
|
||||
lock_guard<recursive_mutex> lock(mMutex);
|
||||
int targetWaiters = mNumWaiters.load() - 1;
|
||||
if (targetWaiters <= -1)
|
||||
return;
|
||||
ReleaseSemaphore(mSemaphore, 1, NULL);
|
||||
while(mNumWaiters > targetWaiters)
|
||||
{
|
||||
auto ret = WaitForSingleObject(mWakeEvent, 1000);
|
||||
if (ret == WAIT_FAILED || ret == WAIT_ABANDONED)
|
||||
std::terminate();
|
||||
}
|
||||
assert(mNumWaiters == targetWaiters);
|
||||
}
|
||||
template <class M, class Rep, class Period>
|
||||
cv_status wait_for(M& lock,
|
||||
const std::chrono::duration<Rep, Period>& rel_time)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
auto timeout = duration_cast<milliseconds>(rel_time).count();
|
||||
DWORD waittime = (timeout < INFINITE) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (INFINITE - 1);
|
||||
bool ret = wait_impl(lock, waittime) || (timeout >= INFINITE);
|
||||
return ret?cv_status::no_timeout:cv_status::timeout;
|
||||
}
|
||||
|
||||
template <class M, class Rep, class Period, class Predicate>
|
||||
bool wait_for(M& lock,
|
||||
const std::chrono::duration<Rep, Period>& rel_time, Predicate pred)
|
||||
{
|
||||
return wait_until(lock, std::chrono::steady_clock::now()+rel_time, pred);
|
||||
}
|
||||
template <class M, class Clock, class Duration>
|
||||
cv_status wait_until (M& lock,
|
||||
const std::chrono::time_point<Clock,Duration>& abs_time)
|
||||
{
|
||||
return wait_for(lock, abs_time - Clock::now());
|
||||
}
|
||||
template <class M, class Clock, class Duration, class Predicate>
|
||||
bool wait_until (M& lock,
|
||||
const std::chrono::time_point<Clock, Duration>& abs_time,
|
||||
Predicate pred)
|
||||
{
|
||||
while (!pred())
|
||||
{
|
||||
if (wait_until(lock, abs_time) == cv_status::timeout)
|
||||
{
|
||||
return pred();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
class condition_variable: condition_variable_any
|
||||
{
|
||||
using base = condition_variable_any;
|
||||
public:
|
||||
using base::native_handle_type;
|
||||
using base::native_handle;
|
||||
using base::base;
|
||||
using base::notify_all;
|
||||
using base::notify_one;
|
||||
void wait(unique_lock<mutex> &lock)
|
||||
{
|
||||
base::wait(lock);
|
||||
}
|
||||
template <class Predicate>
|
||||
void wait(unique_lock<mutex>& lock, Predicate pred)
|
||||
{
|
||||
base::wait(lock, pred);
|
||||
}
|
||||
template <class Rep, class Period>
|
||||
cv_status wait_for(unique_lock<mutex>& lock, const std::chrono::duration<Rep, Period>& rel_time)
|
||||
{
|
||||
return base::wait_for(lock, rel_time);
|
||||
}
|
||||
template <class Rep, class Period, class Predicate>
|
||||
bool wait_for(unique_lock<mutex>& lock, const std::chrono::duration<Rep, Period>& rel_time, Predicate pred)
|
||||
{
|
||||
return base::wait_for(lock, rel_time, pred);
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
cv_status wait_until (unique_lock<mutex>& lock, const std::chrono::time_point<Clock,Duration>& abs_time)
|
||||
{
|
||||
return base::wait_until(lock, abs_time);
|
||||
}
|
||||
template <class Clock, class Duration, class Predicate>
|
||||
bool wait_until (unique_lock<mutex>& lock, const std::chrono::time_point<Clock, Duration>& abs_time, Predicate pred)
|
||||
{
|
||||
return base::wait_until(lock, abs_time, pred);
|
||||
}
|
||||
};
|
||||
#endif // Compiling for XP
|
||||
} // Namespace mingw_stdthread::xp
|
||||
|
||||
#if (WINVER >= _WIN32_WINNT_VISTA)
|
||||
namespace vista
|
||||
{
|
||||
// If compiling for Vista or higher, use the native condition variable.
|
||||
class condition_variable
|
||||
{
|
||||
static constexpr DWORD kInfinite = 0xffffffffl;
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
CONDITION_VARIABLE cvariable_ = CONDITION_VARIABLE_INIT;
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
friend class condition_variable_any;
|
||||
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
template<typename MTX>
|
||||
inline static void before_wait (MTX * pmutex)
|
||||
{
|
||||
pmutex->mOwnerThread.checkSetOwnerBeforeUnlock();
|
||||
}
|
||||
template<typename MTX>
|
||||
inline static void after_wait (MTX * pmutex)
|
||||
{
|
||||
pmutex->mOwnerThread.setOwnerAfterLock(GetCurrentThreadId());
|
||||
}
|
||||
#else
|
||||
inline static void before_wait (void *) { }
|
||||
inline static void after_wait (void *) { }
|
||||
#endif
|
||||
|
||||
bool wait_impl (unique_lock<xp::mutex> & lock, DWORD time)
|
||||
{
|
||||
using mutex_handle_type = typename xp::mutex::native_handle_type;
|
||||
static_assert(std::is_same<mutex_handle_type, PCRITICAL_SECTION>::value,
|
||||
"Native Win32 condition variable requires std::mutex to \
|
||||
use native Win32 critical section objects.");
|
||||
xp::mutex * pmutex = lock.release();
|
||||
before_wait(pmutex);
|
||||
BOOL success = SleepConditionVariableCS(&cvariable_,
|
||||
pmutex->native_handle(),
|
||||
time);
|
||||
after_wait(pmutex);
|
||||
lock = unique_lock<xp::mutex>(*pmutex, adopt_lock);
|
||||
return success;
|
||||
}
|
||||
|
||||
bool wait_unique (windows7::mutex * pmutex, DWORD time)
|
||||
{
|
||||
before_wait(pmutex);
|
||||
BOOL success = SleepConditionVariableSRW( native_handle(),
|
||||
pmutex->native_handle(),
|
||||
time,
|
||||
// CONDITION_VARIABLE_LOCKMODE_SHARED has a value not specified by
|
||||
// Microsoft's Dev Center, but is known to be (convertible to) a ULONG. To
|
||||
// ensure that the value passed to this function is not equal to Microsoft's
|
||||
// constant, we can either use a static_assert, or simply generate an
|
||||
// appropriate value.
|
||||
!CONDITION_VARIABLE_LOCKMODE_SHARED);
|
||||
after_wait(pmutex);
|
||||
return success;
|
||||
}
|
||||
bool wait_impl (unique_lock<windows7::mutex> & lock, DWORD time)
|
||||
{
|
||||
windows7::mutex * pmutex = lock.release();
|
||||
bool success = wait_unique(pmutex, time);
|
||||
lock = unique_lock<windows7::mutex>(*pmutex, adopt_lock);
|
||||
return success;
|
||||
}
|
||||
public:
|
||||
using native_handle_type = PCONDITION_VARIABLE;
|
||||
native_handle_type native_handle (void)
|
||||
{
|
||||
return &cvariable_;
|
||||
}
|
||||
|
||||
condition_variable (void) = default;
|
||||
~condition_variable (void) = default;
|
||||
|
||||
condition_variable (const condition_variable &) = delete;
|
||||
condition_variable & operator= (const condition_variable &) = delete;
|
||||
|
||||
void notify_one (void) noexcept
|
||||
{
|
||||
WakeConditionVariable(&cvariable_);
|
||||
}
|
||||
|
||||
void notify_all (void) noexcept
|
||||
{
|
||||
WakeAllConditionVariable(&cvariable_);
|
||||
}
|
||||
|
||||
void wait (unique_lock<mutex> & lock)
|
||||
{
|
||||
wait_impl(lock, kInfinite);
|
||||
}
|
||||
|
||||
template<class Predicate>
|
||||
void wait (unique_lock<mutex> & lock, Predicate pred)
|
||||
{
|
||||
while (!pred())
|
||||
wait(lock);
|
||||
}
|
||||
|
||||
template <class Rep, class Period>
|
||||
cv_status wait_for(unique_lock<mutex>& lock,
|
||||
const std::chrono::duration<Rep, Period>& rel_time)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
auto timeout = duration_cast<milliseconds>(rel_time).count();
|
||||
DWORD waittime = (timeout < kInfinite) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (kInfinite - 1);
|
||||
bool result = wait_impl(lock, waittime) || (timeout >= kInfinite);
|
||||
return result ? cv_status::no_timeout : cv_status::timeout;
|
||||
}
|
||||
|
||||
template <class Rep, class Period, class Predicate>
|
||||
bool wait_for(unique_lock<mutex>& lock,
|
||||
const std::chrono::duration<Rep, Period>& rel_time,
|
||||
Predicate pred)
|
||||
{
|
||||
return wait_until(lock,
|
||||
std::chrono::steady_clock::now() + rel_time,
|
||||
std::move(pred));
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
cv_status wait_until (unique_lock<mutex>& lock,
|
||||
const std::chrono::time_point<Clock,Duration>& abs_time)
|
||||
{
|
||||
return wait_for(lock, abs_time - Clock::now());
|
||||
}
|
||||
template <class Clock, class Duration, class Predicate>
|
||||
bool wait_until (unique_lock<mutex>& lock,
|
||||
const std::chrono::time_point<Clock, Duration>& abs_time,
|
||||
Predicate pred)
|
||||
{
|
||||
while (!pred())
|
||||
{
|
||||
if (wait_until(lock, abs_time) == cv_status::timeout)
|
||||
{
|
||||
return pred();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class condition_variable_any
|
||||
{
|
||||
static constexpr DWORD kInfinite = 0xffffffffl;
|
||||
using native_shared_mutex = windows7::shared_mutex;
|
||||
|
||||
condition_variable internal_cv_ {};
|
||||
// When available, the SRW-based mutexes should be faster than the
|
||||
// CriticalSection-based mutexes. Only try_lock will be unavailable in Vista,
|
||||
// and try_lock is not used by condition_variable_any.
|
||||
windows7::mutex internal_mutex_ {};
|
||||
|
||||
template<class L>
|
||||
bool wait_impl (L & lock, DWORD time)
|
||||
{
|
||||
unique_lock<decltype(internal_mutex_)> internal_lock(internal_mutex_);
|
||||
lock.unlock();
|
||||
bool success = internal_cv_.wait_impl(internal_lock, time);
|
||||
lock.lock();
|
||||
return success;
|
||||
}
|
||||
// If the lock happens to be called on a native Windows mutex, skip any extra
|
||||
// contention.
|
||||
inline bool wait_impl (unique_lock<mutex> & lock, DWORD time)
|
||||
{
|
||||
return internal_cv_.wait_impl(lock, time);
|
||||
}
|
||||
// Some shared_mutex functionality is available even in Vista, but it's not
|
||||
// until Windows 7 that a full implementation is natively possible. The class
|
||||
// itself is defined, with missing features, at the Vista feature level.
|
||||
bool wait_impl (unique_lock<native_shared_mutex> & lock, DWORD time)
|
||||
{
|
||||
native_shared_mutex * pmutex = lock.release();
|
||||
bool success = internal_cv_.wait_unique(pmutex, time);
|
||||
lock = unique_lock<native_shared_mutex>(*pmutex, adopt_lock);
|
||||
return success;
|
||||
}
|
||||
bool wait_impl (shared_lock<native_shared_mutex> & lock, DWORD time)
|
||||
{
|
||||
native_shared_mutex * pmutex = lock.release();
|
||||
BOOL success = SleepConditionVariableSRW(native_handle(),
|
||||
pmutex->native_handle(), time,
|
||||
CONDITION_VARIABLE_LOCKMODE_SHARED);
|
||||
lock = shared_lock<native_shared_mutex>(*pmutex, adopt_lock);
|
||||
return success;
|
||||
}
|
||||
public:
|
||||
using native_handle_type = typename condition_variable::native_handle_type;
|
||||
|
||||
native_handle_type native_handle (void)
|
||||
{
|
||||
return internal_cv_.native_handle();
|
||||
}
|
||||
|
||||
void notify_one (void) noexcept
|
||||
{
|
||||
internal_cv_.notify_one();
|
||||
}
|
||||
|
||||
void notify_all (void) noexcept
|
||||
{
|
||||
internal_cv_.notify_all();
|
||||
}
|
||||
|
||||
condition_variable_any (void) = default;
|
||||
~condition_variable_any (void) = default;
|
||||
|
||||
template<class L>
|
||||
void wait (L & lock)
|
||||
{
|
||||
wait_impl(lock, kInfinite);
|
||||
}
|
||||
|
||||
template<class L, class Predicate>
|
||||
void wait (L & lock, Predicate pred)
|
||||
{
|
||||
while (!pred())
|
||||
wait(lock);
|
||||
}
|
||||
|
||||
template <class L, class Rep, class Period>
|
||||
cv_status wait_for(L& lock, const std::chrono::duration<Rep,Period>& period)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
auto timeout = duration_cast<milliseconds>(period).count();
|
||||
DWORD waittime = (timeout < kInfinite) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (kInfinite - 1);
|
||||
bool result = wait_impl(lock, waittime) || (timeout >= kInfinite);
|
||||
return result ? cv_status::no_timeout : cv_status::timeout;
|
||||
}
|
||||
|
||||
template <class L, class Rep, class Period, class Predicate>
|
||||
bool wait_for(L& lock, const std::chrono::duration<Rep, Period>& period,
|
||||
Predicate pred)
|
||||
{
|
||||
return wait_until(lock, std::chrono::steady_clock::now() + period,
|
||||
std::move(pred));
|
||||
}
|
||||
template <class L, class Clock, class Duration>
|
||||
cv_status wait_until (L& lock,
|
||||
const std::chrono::time_point<Clock,Duration>& abs_time)
|
||||
{
|
||||
return wait_for(lock, abs_time - Clock::now());
|
||||
}
|
||||
template <class L, class Clock, class Duration, class Predicate>
|
||||
bool wait_until (L& lock,
|
||||
const std::chrono::time_point<Clock, Duration>& abs_time,
|
||||
Predicate pred)
|
||||
{
|
||||
while (!pred())
|
||||
{
|
||||
if (wait_until(lock, abs_time) == cv_status::timeout)
|
||||
{
|
||||
return pred();
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
} // Namespace vista
|
||||
#endif
|
||||
#if WINVER < 0x0600
|
||||
using xp::condition_variable;
|
||||
using xp::condition_variable_any;
|
||||
#else
|
||||
using vista::condition_variable;
|
||||
using vista::condition_variable_any;
|
||||
#endif
|
||||
} // Namespace mingw_stdthread
|
||||
|
||||
// Push objects into std, but only if they are not already there.
|
||||
namespace std
|
||||
{
|
||||
// Because of quirks of the compiler, the common "using namespace std;"
|
||||
// directive would flatten the namespaces and introduce ambiguity where there
|
||||
// was none. Direct specification (std::), however, would be unaffected.
|
||||
// Take the safe option, and include only in the presence of MinGW's win32
|
||||
// implementation.
|
||||
#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
|
||||
using mingw_stdthread::cv_status;
|
||||
using mingw_stdthread::condition_variable;
|
||||
using mingw_stdthread::condition_variable_any;
|
||||
#elif !defined(MINGW_STDTHREAD_REDUNDANCY_WARNING) // Skip repetition
|
||||
#define MINGW_STDTHREAD_REDUNDANCY_WARNING
|
||||
#pragma message "This version of MinGW seems to include a win32 port of\
|
||||
pthreads, and probably already has C++11 std threading classes implemented,\
|
||||
based on pthreads. These classes, found in namespace std, are not overridden\
|
||||
by the mingw-std-thread library. If you would still like to use this\
|
||||
implementation (as it is more lightweight), use the classes provided in\
|
||||
namespace mingw_stdthread."
|
||||
#endif
|
||||
}
|
||||
#endif // MINGW_CONDITIONAL_VARIABLE_H
|
||||
1118
randomx/mingw-std-threads-master/mingw.future.h
Normal file
1118
randomx/mingw-std-threads-master/mingw.future.h
Normal file
File diff suppressed because it is too large
Load Diff
109
randomx/mingw-std-threads-master/mingw.invoke.h
Normal file
109
randomx/mingw-std-threads-master/mingw.invoke.h
Normal file
@@ -0,0 +1,109 @@
|
||||
/// \file mingw.invoke.h
|
||||
/// \brief Lightweight `invoke` implementation, for C++11 and C++14.
|
||||
///
|
||||
/// (c) 2018-2019 by Nathaniel J. McClatchey, San Jose, CA, United States
|
||||
/// \author Nathaniel J. McClatchey, PhD
|
||||
///
|
||||
/// \copyright Simplified (2-clause) BSD License.
|
||||
///
|
||||
/// \note This file may become part of the mingw-w64 runtime package. If/when
|
||||
/// this happens, the appropriate license will be added, i.e. this code will
|
||||
/// become dual-licensed, and the current BSD 2-clause license will stay.
|
||||
|
||||
#ifndef MINGW_INVOKE_H_
|
||||
#define MINGW_INVOKE_H_
|
||||
|
||||
#include <type_traits> // For std::result_of, etc.
|
||||
#include <utility> // For std::forward
|
||||
#include <functional> // For std::reference_wrapper
|
||||
|
||||
namespace mingw_stdthread
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
// For compatibility, implement std::invoke for C++11 and C++14
|
||||
#if __cplusplus < 201703L
|
||||
template<bool PMemFunc, bool PMemData>
|
||||
struct Invoker
|
||||
{
|
||||
template<class F, class... Args>
|
||||
inline static typename std::result_of<F(Args...)>::type invoke (F&& f, Args&&... args)
|
||||
{
|
||||
return std::forward<F>(f)(std::forward<Args>(args)...);
|
||||
}
|
||||
};
|
||||
template<bool>
|
||||
struct InvokerHelper;
|
||||
|
||||
template<>
|
||||
struct InvokerHelper<false>
|
||||
{
|
||||
template<class T1>
|
||||
inline static auto get (T1&& t1) -> decltype(*std::forward<T1>(t1))
|
||||
{
|
||||
return *std::forward<T1>(t1);
|
||||
}
|
||||
|
||||
template<class T1>
|
||||
inline static auto get (const std::reference_wrapper<T1>& t1) -> decltype(t1.get())
|
||||
{
|
||||
return t1.get();
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct InvokerHelper<true>
|
||||
{
|
||||
template<class T1>
|
||||
inline static auto get (T1&& t1) -> decltype(std::forward<T1>(t1))
|
||||
{
|
||||
return std::forward<T1>(t1);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Invoker<true, false>
|
||||
{
|
||||
template<class T, class F, class T1, class... Args>
|
||||
inline static auto invoke (F T::* f, T1&& t1, Args&&... args) ->\
|
||||
decltype((InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(std::forward<T1>(t1)).*f)(std::forward<Args>(args)...))
|
||||
{
|
||||
return (InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(std::forward<T1>(t1)).*f)(std::forward<Args>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Invoker<false, true>
|
||||
{
|
||||
template<class T, class F, class T1, class... Args>
|
||||
inline static auto invoke (F T::* f, T1&& t1, Args&&... args) ->\
|
||||
decltype(InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(t1).*f)
|
||||
{
|
||||
return InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(t1).*f;
|
||||
}
|
||||
};
|
||||
|
||||
template<class F, class... Args>
|
||||
struct InvokeResult
|
||||
{
|
||||
typedef Invoker<std::is_member_function_pointer<typename std::remove_reference<F>::type>::value,
|
||||
std::is_member_object_pointer<typename std::remove_reference<F>::type>::value &&
|
||||
(sizeof...(Args) == 1)> invoker;
|
||||
inline static auto invoke (F&& f, Args&&... args) -> decltype(invoker::invoke(std::forward<F>(f), std::forward<Args>(args)...))
|
||||
{
|
||||
return invoker::invoke(std::forward<F>(f), std::forward<Args>(args)...);
|
||||
}
|
||||
};
|
||||
|
||||
template<class F, class...Args>
|
||||
auto invoke (F&& f, Args&&... args) -> decltype(InvokeResult<F, Args...>::invoke(std::forward<F>(f), std::forward<Args>(args)...))
|
||||
{
|
||||
return InvokeResult<F, Args...>::invoke(std::forward<F>(f), std::forward<Args>(args)...);
|
||||
}
|
||||
#else
|
||||
using std::invoke;
|
||||
#endif
|
||||
} // Namespace "detail"
|
||||
} // Namespace "mingw_stdthread"
|
||||
|
||||
#endif
|
||||
491
randomx/mingw-std-threads-master/mingw.mutex.h
Normal file
491
randomx/mingw-std-threads-master/mingw.mutex.h
Normal file
@@ -0,0 +1,491 @@
|
||||
/**
|
||||
* @file mingw.mutex.h
|
||||
* @brief std::mutex et al implementation for MinGW
|
||||
** (c) 2013-2016 by Mega Limited, Auckland, New Zealand
|
||||
* @author Alexander Vassilev
|
||||
*
|
||||
* @copyright Simplified (2-clause) BSD License.
|
||||
* You should have received a copy of the license along with this
|
||||
* program.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* @note
|
||||
* This file may become part of the mingw-w64 runtime package. If/when this happens,
|
||||
* the appropriate license will be added, i.e. this code will become dual-licensed,
|
||||
* and the current BSD 2-clause license will stay.
|
||||
*/
|
||||
|
||||
#ifndef WIN32STDMUTEX_H
|
||||
#define WIN32STDMUTEX_H
|
||||
|
||||
#if !defined(__cplusplus) || (__cplusplus < 201103L)
|
||||
#error A C++11 compiler is required!
|
||||
#endif
|
||||
// Recursion checks on non-recursive locks have some performance penalty, and
|
||||
// the C++ standard does not mandate them. The user might want to explicitly
|
||||
// enable or disable such checks. If the user has no preference, enable such
|
||||
// checks in debug builds, but not in release builds.
|
||||
#ifdef STDMUTEX_RECURSION_CHECKS
|
||||
#elif defined(NDEBUG)
|
||||
#define STDMUTEX_RECURSION_CHECKS 0
|
||||
#else
|
||||
#define STDMUTEX_RECURSION_CHECKS 1
|
||||
#endif
|
||||
|
||||
#include <chrono>
|
||||
#include <system_error>
|
||||
#include <atomic>
|
||||
#include <mutex> //need for call_once()
|
||||
|
||||
#if STDMUTEX_RECURSION_CHECKS || !defined(NDEBUG)
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
#include <sdkddkver.h> // Detect Windows version.
|
||||
|
||||
#if (defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR))
|
||||
#pragma message "The Windows API that MinGW-w32 provides is not fully compatible\
|
||||
with Microsoft's API. We'll try to work around this, but we can make no\
|
||||
guarantees. This problem does not exist in MinGW-w64."
|
||||
#include <windows.h> // No further granularity can be expected.
|
||||
#else
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
#include <processthreadsapi.h> // For GetCurrentThreadId
|
||||
#endif
|
||||
#include <synchapi.h> // For InitializeCriticalSection, etc.
|
||||
#include <errhandlingapi.h> // For GetLastError
|
||||
#include <handleapi.h>
|
||||
#endif
|
||||
|
||||
// Need for the implementation of invoke
|
||||
#include "mingw.invoke.h"
|
||||
|
||||
#if !defined(_WIN32_WINNT) || (_WIN32_WINNT < 0x0501)
|
||||
#error To use the MinGW-std-threads library, you will need to define the macro _WIN32_WINNT to be 0x0501 (Windows XP) or higher.
|
||||
#endif
|
||||
|
||||
namespace mingw_stdthread
|
||||
{
|
||||
// The _NonRecursive class has mechanisms that do not play nice with direct
|
||||
// manipulation of the native handle. This forward declaration is part of
|
||||
// a friend class declaration.
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
namespace vista
|
||||
{
|
||||
class condition_variable;
|
||||
}
|
||||
#endif
|
||||
// To make this namespace equivalent to the thread-related subset of std,
|
||||
// pull in the classes and class templates supplied by std but not by this
|
||||
// implementation.
|
||||
using std::lock_guard;
|
||||
using std::unique_lock;
|
||||
using std::adopt_lock_t;
|
||||
using std::defer_lock_t;
|
||||
using std::try_to_lock_t;
|
||||
using std::adopt_lock;
|
||||
using std::defer_lock;
|
||||
using std::try_to_lock;
|
||||
|
||||
class recursive_mutex
|
||||
{
|
||||
CRITICAL_SECTION mHandle;
|
||||
public:
|
||||
typedef LPCRITICAL_SECTION native_handle_type;
|
||||
native_handle_type native_handle() {return &mHandle;}
|
||||
recursive_mutex() noexcept : mHandle()
|
||||
{
|
||||
InitializeCriticalSection(&mHandle);
|
||||
}
|
||||
recursive_mutex (const recursive_mutex&) = delete;
|
||||
recursive_mutex& operator=(const recursive_mutex&) = delete;
|
||||
~recursive_mutex() noexcept
|
||||
{
|
||||
DeleteCriticalSection(&mHandle);
|
||||
}
|
||||
void lock()
|
||||
{
|
||||
EnterCriticalSection(&mHandle);
|
||||
}
|
||||
void unlock()
|
||||
{
|
||||
LeaveCriticalSection(&mHandle);
|
||||
}
|
||||
bool try_lock()
|
||||
{
|
||||
return (TryEnterCriticalSection(&mHandle)!=0);
|
||||
}
|
||||
};
|
||||
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
struct _OwnerThread
|
||||
{
|
||||
// If this is to be read before locking, then the owner-thread variable must
|
||||
// be atomic to prevent a torn read from spuriously causing errors.
|
||||
std::atomic<DWORD> mOwnerThread;
|
||||
constexpr _OwnerThread () noexcept : mOwnerThread(0) {}
|
||||
static void on_deadlock (void)
|
||||
{
|
||||
using namespace std;
|
||||
fprintf(stderr, "FATAL: Recursive locking of non-recursive mutex\
|
||||
detected. Throwing system exception\n");
|
||||
fflush(stderr);
|
||||
throw system_error(make_error_code(errc::resource_deadlock_would_occur));
|
||||
}
|
||||
DWORD checkOwnerBeforeLock() const
|
||||
{
|
||||
DWORD self = GetCurrentThreadId();
|
||||
if (mOwnerThread.load(std::memory_order_relaxed) == self)
|
||||
on_deadlock();
|
||||
return self;
|
||||
}
|
||||
void setOwnerAfterLock(DWORD id)
|
||||
{
|
||||
mOwnerThread.store(id, std::memory_order_relaxed);
|
||||
}
|
||||
void checkSetOwnerBeforeUnlock()
|
||||
{
|
||||
DWORD self = GetCurrentThreadId();
|
||||
if (mOwnerThread.load(std::memory_order_relaxed) != self)
|
||||
on_deadlock();
|
||||
mOwnerThread.store(0, std::memory_order_relaxed);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
// Though the Slim Reader-Writer (SRW) locks used here are not complete until
|
||||
// Windows 7, implementing partial functionality in Vista will simplify the
|
||||
// interaction with condition variables.
|
||||
#if defined(_WIN32) && (WINVER >= _WIN32_WINNT_VISTA)
|
||||
namespace windows7
|
||||
{
|
||||
class mutex
|
||||
{
|
||||
SRWLOCK mHandle;
|
||||
// Track locking thread for error checking.
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
friend class vista::condition_variable;
|
||||
_OwnerThread mOwnerThread {};
|
||||
#endif
|
||||
public:
|
||||
typedef PSRWLOCK native_handle_type;
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
constexpr mutex () noexcept : mHandle(SRWLOCK_INIT) { }
|
||||
#pragma GCC diagnostic pop
|
||||
mutex (const mutex&) = delete;
|
||||
mutex & operator= (const mutex&) = delete;
|
||||
void lock (void)
|
||||
{
|
||||
// Note: Undefined behavior if called recursively.
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
DWORD self = mOwnerThread.checkOwnerBeforeLock();
|
||||
#endif
|
||||
AcquireSRWLockExclusive(&mHandle);
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
mOwnerThread.setOwnerAfterLock(self);
|
||||
#endif
|
||||
}
|
||||
void unlock (void)
|
||||
{
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
mOwnerThread.checkSetOwnerBeforeUnlock();
|
||||
#endif
|
||||
ReleaseSRWLockExclusive(&mHandle);
|
||||
}
|
||||
// TryAcquireSRW functions are a Windows 7 feature.
|
||||
#if (WINVER >= _WIN32_WINNT_WIN7)
|
||||
bool try_lock (void)
|
||||
{
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
DWORD self = mOwnerThread.checkOwnerBeforeLock();
|
||||
#endif
|
||||
BOOL ret = TryAcquireSRWLockExclusive(&mHandle);
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
if (ret)
|
||||
mOwnerThread.setOwnerAfterLock(self);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
native_handle_type native_handle (void)
|
||||
{
|
||||
return &mHandle;
|
||||
}
|
||||
};
|
||||
} // Namespace windows7
|
||||
#endif // Compiling for Vista
|
||||
namespace xp
|
||||
{
|
||||
class mutex
|
||||
{
|
||||
CRITICAL_SECTION mHandle;
|
||||
std::atomic_uchar mState;
|
||||
// Track locking thread for error checking.
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
friend class vista::condition_variable;
|
||||
_OwnerThread mOwnerThread {};
|
||||
#endif
|
||||
public:
|
||||
typedef PCRITICAL_SECTION native_handle_type;
|
||||
constexpr mutex () noexcept : mHandle(), mState(2) { }
|
||||
mutex (const mutex&) = delete;
|
||||
mutex & operator= (const mutex&) = delete;
|
||||
~mutex() noexcept
|
||||
{
|
||||
// Undefined behavior if the mutex is held (locked) by any thread.
|
||||
// Undefined behavior if a thread terminates while holding ownership of the
|
||||
// mutex.
|
||||
DeleteCriticalSection(&mHandle);
|
||||
}
|
||||
void lock (void)
|
||||
{
|
||||
unsigned char state = mState.load(std::memory_order_acquire);
|
||||
while (state) {
|
||||
if ((state == 2) && mState.compare_exchange_weak(state, 1, std::memory_order_acquire))
|
||||
{
|
||||
InitializeCriticalSection(&mHandle);
|
||||
mState.store(0, std::memory_order_release);
|
||||
break;
|
||||
}
|
||||
if (state == 1)
|
||||
{
|
||||
Sleep(0);
|
||||
state = mState.load(std::memory_order_acquire);
|
||||
}
|
||||
}
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
DWORD self = mOwnerThread.checkOwnerBeforeLock();
|
||||
#endif
|
||||
EnterCriticalSection(&mHandle);
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
mOwnerThread.setOwnerAfterLock(self);
|
||||
#endif
|
||||
}
|
||||
void unlock (void)
|
||||
{
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
mOwnerThread.checkSetOwnerBeforeUnlock();
|
||||
#endif
|
||||
LeaveCriticalSection(&mHandle);
|
||||
}
|
||||
bool try_lock (void)
|
||||
{
|
||||
unsigned char state = mState.load(std::memory_order_acquire);
|
||||
if ((state == 2) && mState.compare_exchange_strong(state, 1, std::memory_order_acquire))
|
||||
{
|
||||
InitializeCriticalSection(&mHandle);
|
||||
mState.store(0, std::memory_order_release);
|
||||
}
|
||||
if (state == 1)
|
||||
return false;
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
DWORD self = mOwnerThread.checkOwnerBeforeLock();
|
||||
#endif
|
||||
BOOL ret = TryEnterCriticalSection(&mHandle);
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
if (ret)
|
||||
mOwnerThread.setOwnerAfterLock(self);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
native_handle_type native_handle (void)
|
||||
{
|
||||
return &mHandle;
|
||||
}
|
||||
};
|
||||
} // Namespace "xp"
|
||||
#if (WINVER >= _WIN32_WINNT_WIN7)
|
||||
using windows7::mutex;
|
||||
#else
|
||||
using xp::mutex;
|
||||
#endif
|
||||
|
||||
class recursive_timed_mutex
|
||||
{
|
||||
static constexpr DWORD kWaitAbandoned = 0x00000080l;
|
||||
static constexpr DWORD kWaitObject0 = 0x00000000l;
|
||||
static constexpr DWORD kInfinite = 0xffffffffl;
|
||||
inline bool try_lock_internal (DWORD ms) noexcept
|
||||
{
|
||||
DWORD ret = WaitForSingleObject(mHandle, ms);
|
||||
#ifndef NDEBUG
|
||||
if (ret == kWaitAbandoned)
|
||||
{
|
||||
using namespace std;
|
||||
fprintf(stderr, "FATAL: Thread terminated while holding a mutex.");
|
||||
terminate();
|
||||
}
|
||||
#endif
|
||||
return (ret == kWaitObject0) || (ret == kWaitAbandoned);
|
||||
}
|
||||
protected:
|
||||
HANDLE mHandle;
|
||||
// Track locking thread for error checking of non-recursive timed_mutex. For
|
||||
// standard compliance, this must be defined in same class and at the same
|
||||
// access-control level as every other variable in the timed_mutex.
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
friend class vista::condition_variable;
|
||||
_OwnerThread mOwnerThread {};
|
||||
#endif
|
||||
public:
|
||||
typedef HANDLE native_handle_type;
|
||||
native_handle_type native_handle() const {return mHandle;}
|
||||
recursive_timed_mutex(const recursive_timed_mutex&) = delete;
|
||||
recursive_timed_mutex& operator=(const recursive_timed_mutex&) = delete;
|
||||
recursive_timed_mutex(): mHandle(CreateMutex(NULL, FALSE, NULL)) {}
|
||||
~recursive_timed_mutex()
|
||||
{
|
||||
CloseHandle(mHandle);
|
||||
}
|
||||
void lock()
|
||||
{
|
||||
DWORD ret = WaitForSingleObject(mHandle, kInfinite);
|
||||
// If (ret == WAIT_ABANDONED), then the thread that held ownership was
|
||||
// terminated. Behavior is undefined, but Windows will pass ownership to this
|
||||
// thread.
|
||||
#ifndef NDEBUG
|
||||
if (ret == kWaitAbandoned)
|
||||
{
|
||||
using namespace std;
|
||||
fprintf(stderr, "FATAL: Thread terminated while holding a mutex.");
|
||||
terminate();
|
||||
}
|
||||
#endif
|
||||
if ((ret != kWaitObject0) && (ret != kWaitAbandoned))
|
||||
{
|
||||
throw std::system_error(GetLastError(), std::system_category());
|
||||
}
|
||||
}
|
||||
void unlock()
|
||||
{
|
||||
if (!ReleaseMutex(mHandle))
|
||||
throw std::system_error(GetLastError(), std::system_category());
|
||||
}
|
||||
bool try_lock()
|
||||
{
|
||||
return try_lock_internal(0);
|
||||
}
|
||||
template <class Rep, class Period>
|
||||
bool try_lock_for(const std::chrono::duration<Rep,Period>& dur)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
auto timeout = duration_cast<milliseconds>(dur).count();
|
||||
while (timeout > 0)
|
||||
{
|
||||
constexpr auto kMaxStep = static_cast<decltype(timeout)>(kInfinite-1);
|
||||
auto step = (timeout < kMaxStep) ? timeout : kMaxStep;
|
||||
if (try_lock_internal(static_cast<DWORD>(step)))
|
||||
return true;
|
||||
timeout -= step;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
bool try_lock_until(const std::chrono::time_point<Clock,Duration>& timeout_time)
|
||||
{
|
||||
return try_lock_for(timeout_time - Clock::now());
|
||||
}
|
||||
};
|
||||
|
||||
// Override if, and only if, it is necessary for error-checking.
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
class timed_mutex: recursive_timed_mutex
|
||||
{
|
||||
public:
|
||||
timed_mutex(const timed_mutex&) = delete;
|
||||
timed_mutex& operator=(const timed_mutex&) = delete;
|
||||
void lock()
|
||||
{
|
||||
DWORD self = mOwnerThread.checkOwnerBeforeLock();
|
||||
recursive_timed_mutex::lock();
|
||||
mOwnerThread.setOwnerAfterLock(self);
|
||||
}
|
||||
void unlock()
|
||||
{
|
||||
mOwnerThread.checkSetOwnerBeforeUnlock();
|
||||
recursive_timed_mutex::unlock();
|
||||
}
|
||||
template <class Rep, class Period>
|
||||
bool try_lock_for(const std::chrono::duration<Rep,Period>& dur)
|
||||
{
|
||||
DWORD self = mOwnerThread.checkOwnerBeforeLock();
|
||||
bool ret = recursive_timed_mutex::try_lock_for(dur);
|
||||
if (ret)
|
||||
mOwnerThread.setOwnerAfterLock(self);
|
||||
return ret;
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
bool try_lock_until(const std::chrono::time_point<Clock,Duration>& timeout_time)
|
||||
{
|
||||
return try_lock_for(timeout_time - Clock::now());
|
||||
}
|
||||
bool try_lock ()
|
||||
{
|
||||
return try_lock_for(std::chrono::milliseconds(0));
|
||||
}
|
||||
};
|
||||
#else
|
||||
typedef recursive_timed_mutex timed_mutex;
|
||||
#endif
|
||||
|
||||
class once_flag
|
||||
{
|
||||
// When available, the SRW-based mutexes should be faster than the
|
||||
// CriticalSection-based mutexes. Only try_lock will be unavailable in Vista,
|
||||
// and try_lock is not used by once_flag.
|
||||
#if (_WIN32_WINNT == _WIN32_WINNT_VISTA)
|
||||
windows7::mutex mMutex;
|
||||
#else
|
||||
mutex mMutex;
|
||||
#endif
|
||||
std::atomic_bool mHasRun;
|
||||
once_flag(const once_flag&) = delete;
|
||||
once_flag& operator=(const once_flag&) = delete;
|
||||
template<class Callable, class... Args>
|
||||
friend void call_once(once_flag& once, Callable&& f, Args&&... args);
|
||||
public:
|
||||
constexpr once_flag() noexcept: mMutex(), mHasRun(false) {}
|
||||
};
|
||||
|
||||
template<class Callable, class... Args>
|
||||
void call_once(once_flag& flag, Callable&& func, Args&&... args)
|
||||
{
|
||||
if (flag.mHasRun.load(std::memory_order_acquire))
|
||||
return;
|
||||
lock_guard<decltype(flag.mMutex)> lock(flag.mMutex);
|
||||
if (flag.mHasRun.load(std::memory_order_acquire))
|
||||
return;
|
||||
detail::invoke(std::forward<Callable>(func),std::forward<Args>(args)...);
|
||||
flag.mHasRun.store(true, std::memory_order_release);
|
||||
}
|
||||
} // Namespace mingw_stdthread
|
||||
|
||||
// Push objects into std, but only if they are not already there.
|
||||
namespace std
|
||||
{
|
||||
// Because of quirks of the compiler, the common "using namespace std;"
|
||||
// directive would flatten the namespaces and introduce ambiguity where there
|
||||
// was none. Direct specification (std::), however, would be unaffected.
|
||||
// Take the safe option, and include only in the presence of MinGW's win32
|
||||
// implementation.
|
||||
#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
|
||||
using mingw_stdthread::recursive_mutex;
|
||||
using mingw_stdthread::mutex;
|
||||
using mingw_stdthread::recursive_timed_mutex;
|
||||
using mingw_stdthread::timed_mutex;
|
||||
using mingw_stdthread::once_flag;
|
||||
using mingw_stdthread::call_once;
|
||||
#elif !defined(MINGW_STDTHREAD_REDUNDANCY_WARNING) // Skip repetition
|
||||
#define MINGW_STDTHREAD_REDUNDANCY_WARNING
|
||||
#pragma message "This version of MinGW seems to include a win32 port of\
|
||||
pthreads, and probably already has C++11 std threading classes implemented,\
|
||||
based on pthreads. These classes, found in namespace std, are not overridden\
|
||||
by the mingw-std-thread library. If you would still like to use this\
|
||||
implementation (as it is more lightweight), use the classes provided in\
|
||||
namespace mingw_stdthread."
|
||||
#endif
|
||||
}
|
||||
#endif // WIN32STDMUTEX_H
|
||||
503
randomx/mingw-std-threads-master/mingw.shared_mutex.h
Normal file
503
randomx/mingw-std-threads-master/mingw.shared_mutex.h
Normal file
@@ -0,0 +1,503 @@
|
||||
/// \file mingw.shared_mutex.h
|
||||
/// \brief Standard-compliant shared_mutex for MinGW
|
||||
///
|
||||
/// (c) 2017 by Nathaniel J. McClatchey, Athens OH, United States
|
||||
/// \author Nathaniel J. McClatchey
|
||||
///
|
||||
/// \copyright Simplified (2-clause) BSD License.
|
||||
///
|
||||
/// \note This file may become part of the mingw-w64 runtime package. If/when
|
||||
/// this happens, the appropriate license will be added, i.e. this code will
|
||||
/// become dual-licensed, and the current BSD 2-clause license will stay.
|
||||
/// \note Target Windows version is determined by WINVER, which is determined in
|
||||
/// <windows.h> from _WIN32_WINNT, which can itself be set by the user.
|
||||
|
||||
// Notes on the namespaces:
|
||||
// - The implementation can be accessed directly in the namespace
|
||||
// mingw_stdthread.
|
||||
// - Objects will be brought into namespace std by a using directive. This
|
||||
// will cause objects declared in std (such as MinGW's implementation) to
|
||||
// hide this implementation's definitions.
|
||||
// - To avoid poluting the namespace with implementation details, all objects
|
||||
// to be pushed into std will be placed in mingw_stdthread::visible.
|
||||
// The end result is that if MinGW supplies an object, it is automatically
|
||||
// used. If MinGW does not supply an object, this implementation's version will
|
||||
// instead be used.
|
||||
|
||||
#ifndef MINGW_SHARED_MUTEX_H_
|
||||
#define MINGW_SHARED_MUTEX_H_
|
||||
|
||||
#if !defined(__cplusplus) || (__cplusplus < 201103L)
|
||||
#error A C++11 compiler is required!
|
||||
#endif
|
||||
|
||||
#include <cassert>
|
||||
// For descriptive errors.
|
||||
#include <system_error>
|
||||
// Implementing a shared_mutex without OS support will require atomic read-
|
||||
// modify-write capacity.
|
||||
#include <atomic>
|
||||
// For timing in shared_lock and shared_timed_mutex.
|
||||
#include <chrono>
|
||||
#include <limits>
|
||||
|
||||
// Use MinGW's shared_lock class template, if it's available. Requires C++14.
|
||||
// If unavailable (eg. because this library is being used in C++11), then an
|
||||
// implementation of shared_lock is provided by this header.
|
||||
#if (__cplusplus >= 201402L)
|
||||
#include <shared_mutex>
|
||||
#endif
|
||||
|
||||
// For defer_lock_t, adopt_lock_t, and try_to_lock_t
|
||||
#include "mingw.mutex.h"
|
||||
// For this_thread::yield.
|
||||
//#include "mingw.thread.h"
|
||||
|
||||
// Might be able to use native Slim Reader-Writer (SRW) locks.
|
||||
#ifdef _WIN32
|
||||
#include <sdkddkver.h> // Detect Windows version.
|
||||
#if (defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR))
|
||||
#pragma message "The Windows API that MinGW-w32 provides is not fully compatible\
|
||||
with Microsoft's API. We'll try to work around this, but we can make no\
|
||||
guarantees. This problem does not exist in MinGW-w64."
|
||||
#include <windows.h> // No further granularity can be expected.
|
||||
#else
|
||||
#include <synchapi.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
namespace mingw_stdthread
|
||||
{
|
||||
// Define a portable atomics-based shared_mutex
|
||||
namespace portable
|
||||
{
|
||||
class shared_mutex
|
||||
{
|
||||
typedef uint_fast16_t counter_type;
|
||||
std::atomic<counter_type> mCounter {0};
|
||||
static constexpr counter_type kWriteBit = 1 << (std::numeric_limits<counter_type>::digits - 1);
|
||||
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
// Runtime checker for verifying owner threads. Note: Exclusive mode only.
|
||||
_OwnerThread mOwnerThread {};
|
||||
#endif
|
||||
public:
|
||||
typedef shared_mutex * native_handle_type;
|
||||
|
||||
shared_mutex () = default;
|
||||
|
||||
// No form of copying or moving should be allowed.
|
||||
shared_mutex (const shared_mutex&) = delete;
|
||||
shared_mutex & operator= (const shared_mutex&) = delete;
|
||||
|
||||
~shared_mutex ()
|
||||
{
|
||||
// Terminate if someone tries to destroy an owned mutex.
|
||||
assert(mCounter.load(std::memory_order_relaxed) == 0);
|
||||
}
|
||||
|
||||
void lock_shared (void)
|
||||
{
|
||||
counter_type expected = mCounter.load(std::memory_order_relaxed);
|
||||
do
|
||||
{
|
||||
// Delay if writing or if too many readers are attempting to read.
|
||||
if (expected >= kWriteBit - 1)
|
||||
{
|
||||
using namespace std;
|
||||
expected = mCounter.load(std::memory_order_relaxed);
|
||||
continue;
|
||||
}
|
||||
if (mCounter.compare_exchange_weak(expected,
|
||||
static_cast<counter_type>(expected + 1),
|
||||
std::memory_order_acquire,
|
||||
std::memory_order_relaxed))
|
||||
break;
|
||||
}
|
||||
while (true);
|
||||
}
|
||||
|
||||
bool try_lock_shared (void)
|
||||
{
|
||||
counter_type expected = mCounter.load(std::memory_order_relaxed) & static_cast<counter_type>(~kWriteBit);
|
||||
if (expected + 1 == kWriteBit)
|
||||
return false;
|
||||
else
|
||||
return mCounter.compare_exchange_strong( expected,
|
||||
static_cast<counter_type>(expected + 1),
|
||||
std::memory_order_acquire,
|
||||
std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void unlock_shared (void)
|
||||
{
|
||||
using namespace std;
|
||||
#ifndef NDEBUG
|
||||
if (!(mCounter.fetch_sub(1, memory_order_release) & static_cast<counter_type>(~kWriteBit)))
|
||||
throw system_error(make_error_code(errc::operation_not_permitted));
|
||||
#else
|
||||
mCounter.fetch_sub(1, memory_order_release);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Behavior is undefined if a lock was previously acquired.
|
||||
void lock (void)
|
||||
{
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
DWORD self = mOwnerThread.checkOwnerBeforeLock();
|
||||
#endif
|
||||
using namespace std;
|
||||
// Might be able to use relaxed memory order...
|
||||
// Wait for the write-lock to be unlocked, then claim the write slot.
|
||||
counter_type current;
|
||||
while ((current = mCounter.fetch_or(kWriteBit, std::memory_order_acquire)) & kWriteBit);
|
||||
//this_thread::yield();
|
||||
// Wait for readers to finish up.
|
||||
while (current != kWriteBit)
|
||||
{
|
||||
//this_thread::yield();
|
||||
current = mCounter.load(std::memory_order_acquire);
|
||||
}
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
mOwnerThread.setOwnerAfterLock(self);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool try_lock (void)
|
||||
{
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
DWORD self = mOwnerThread.checkOwnerBeforeLock();
|
||||
#endif
|
||||
counter_type expected = 0;
|
||||
bool ret = mCounter.compare_exchange_strong(expected, kWriteBit,
|
||||
std::memory_order_acquire,
|
||||
std::memory_order_relaxed);
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
if (ret)
|
||||
mOwnerThread.setOwnerAfterLock(self);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
void unlock (void)
|
||||
{
|
||||
#if STDMUTEX_RECURSION_CHECKS
|
||||
mOwnerThread.checkSetOwnerBeforeUnlock();
|
||||
#endif
|
||||
using namespace std;
|
||||
#ifndef NDEBUG
|
||||
if (mCounter.load(memory_order_relaxed) != kWriteBit)
|
||||
throw system_error(make_error_code(errc::operation_not_permitted));
|
||||
#endif
|
||||
mCounter.store(0, memory_order_release);
|
||||
}
|
||||
|
||||
native_handle_type native_handle (void)
|
||||
{
|
||||
return this;
|
||||
}
|
||||
};
|
||||
|
||||
} // Namespace portable
|
||||
|
||||
// The native shared_mutex implementation primarily uses features of Windows
|
||||
// Vista, but the features used for try_lock and try_lock_shared were not
|
||||
// introduced until Windows 7. To allow limited use while compiling for Vista,
|
||||
// I define the class without try_* functions in that case.
|
||||
// Only fully-featured implementations will be placed into namespace std.
|
||||
#if defined(_WIN32) && (WINVER >= _WIN32_WINNT_VISTA)
|
||||
namespace vista
|
||||
{
|
||||
class condition_variable_any;
|
||||
}
|
||||
|
||||
namespace windows7
|
||||
{
|
||||
// We already #include "mingw.mutex.h". May as well reduce redundancy.
|
||||
class shared_mutex : windows7::mutex
|
||||
{
|
||||
// Allow condition_variable_any (and only condition_variable_any) to treat a
|
||||
// shared_mutex as its base class.
|
||||
friend class vista::condition_variable_any;
|
||||
public:
|
||||
using windows7::mutex::native_handle_type;
|
||||
using windows7::mutex::lock;
|
||||
using windows7::mutex::unlock;
|
||||
using windows7::mutex::native_handle;
|
||||
|
||||
void lock_shared (void)
|
||||
{
|
||||
AcquireSRWLockShared(native_handle());
|
||||
}
|
||||
|
||||
void unlock_shared (void)
|
||||
{
|
||||
ReleaseSRWLockShared(native_handle());
|
||||
}
|
||||
|
||||
// TryAcquireSRW functions are a Windows 7 feature.
|
||||
#if (WINVER >= _WIN32_WINNT_WIN7)
|
||||
bool try_lock_shared (void)
|
||||
{
|
||||
return TryAcquireSRWLockShared(native_handle()) != 0;
|
||||
}
|
||||
|
||||
using windows7::mutex::try_lock;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // Namespace windows7
|
||||
#endif // Compiling for Vista
|
||||
#if (defined(_WIN32) && (WINVER >= _WIN32_WINNT_WIN7))
|
||||
using windows7::shared_mutex;
|
||||
#else
|
||||
using portable::shared_mutex;
|
||||
#endif
|
||||
|
||||
class shared_timed_mutex : shared_mutex
|
||||
{
|
||||
typedef shared_mutex Base;
|
||||
public:
|
||||
using Base::lock;
|
||||
using Base::try_lock;
|
||||
using Base::unlock;
|
||||
using Base::lock_shared;
|
||||
using Base::try_lock_shared;
|
||||
using Base::unlock_shared;
|
||||
|
||||
template< class Clock, class Duration >
|
||||
bool try_lock_until ( const std::chrono::time_point<Clock,Duration>& cutoff )
|
||||
{
|
||||
do
|
||||
{
|
||||
if (try_lock())
|
||||
return true;
|
||||
}
|
||||
while (std::chrono::steady_clock::now() < cutoff);
|
||||
return false;
|
||||
}
|
||||
|
||||
template< class Rep, class Period >
|
||||
bool try_lock_for (const std::chrono::duration<Rep,Period>& rel_time)
|
||||
{
|
||||
return try_lock_until(std::chrono::steady_clock::now() + rel_time);
|
||||
}
|
||||
|
||||
template< class Clock, class Duration >
|
||||
bool try_lock_shared_until ( const std::chrono::time_point<Clock,Duration>& cutoff )
|
||||
{
|
||||
do
|
||||
{
|
||||
if (try_lock_shared())
|
||||
return true;
|
||||
}
|
||||
while (std::chrono::steady_clock::now() < cutoff);
|
||||
return false;
|
||||
}
|
||||
|
||||
template< class Rep, class Period >
|
||||
bool try_lock_shared_for (const std::chrono::duration<Rep,Period>& rel_time)
|
||||
{
|
||||
return try_lock_shared_until(std::chrono::steady_clock::now() + rel_time);
|
||||
}
|
||||
};
|
||||
|
||||
#if __cplusplus >= 201402L
|
||||
using std::shared_lock;
|
||||
#else
|
||||
// If not supplied by shared_mutex (eg. because C++14 is not supported), I
|
||||
// supply the various helper classes that the header should have defined.
|
||||
template<class Mutex>
|
||||
class shared_lock
|
||||
{
|
||||
Mutex * mMutex;
|
||||
bool mOwns;
|
||||
// Reduce code redundancy
|
||||
void verify_lockable (void)
|
||||
{
|
||||
using namespace std;
|
||||
if (mMutex == nullptr)
|
||||
throw system_error(make_error_code(errc::operation_not_permitted));
|
||||
if (mOwns)
|
||||
throw system_error(make_error_code(errc::resource_deadlock_would_occur));
|
||||
}
|
||||
public:
|
||||
typedef Mutex mutex_type;
|
||||
|
||||
shared_lock (void) noexcept
|
||||
: mMutex(nullptr), mOwns(false)
|
||||
{
|
||||
}
|
||||
|
||||
shared_lock (shared_lock<Mutex> && other) noexcept
|
||||
: mMutex(other.mutex_), mOwns(other.owns_)
|
||||
{
|
||||
other.mMutex = nullptr;
|
||||
other.mOwns = false;
|
||||
}
|
||||
|
||||
explicit shared_lock (mutex_type & m)
|
||||
: mMutex(&m), mOwns(true)
|
||||
{
|
||||
mMutex->lock_shared();
|
||||
}
|
||||
|
||||
shared_lock (mutex_type & m, defer_lock_t) noexcept
|
||||
: mMutex(&m), mOwns(false)
|
||||
{
|
||||
}
|
||||
|
||||
shared_lock (mutex_type & m, adopt_lock_t)
|
||||
: mMutex(&m), mOwns(true)
|
||||
{
|
||||
}
|
||||
|
||||
shared_lock (mutex_type & m, try_to_lock_t)
|
||||
: mMutex(&m), mOwns(m.try_lock_shared())
|
||||
{
|
||||
}
|
||||
|
||||
template< class Rep, class Period >
|
||||
shared_lock( mutex_type& m, const std::chrono::duration<Rep,Period>& timeout_duration )
|
||||
: mMutex(&m), mOwns(m.try_lock_shared_for(timeout_duration))
|
||||
{
|
||||
}
|
||||
|
||||
template< class Clock, class Duration >
|
||||
shared_lock( mutex_type& m, const std::chrono::time_point<Clock,Duration>& timeout_time )
|
||||
: mMutex(&m), mOwns(m.try_lock_shared_until(timeout_time))
|
||||
{
|
||||
}
|
||||
|
||||
shared_lock& operator= (shared_lock<Mutex> && other) noexcept
|
||||
{
|
||||
if (&other != this)
|
||||
{
|
||||
if (mOwns)
|
||||
mMutex->unlock_shared();
|
||||
mMutex = other.mMutex;
|
||||
mOwns = other.mOwns;
|
||||
other.mMutex = nullptr;
|
||||
other.mOwns = false;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
~shared_lock (void)
|
||||
{
|
||||
if (mOwns)
|
||||
mMutex->unlock_shared();
|
||||
}
|
||||
|
||||
shared_lock (const shared_lock<Mutex> &) = delete;
|
||||
shared_lock& operator= (const shared_lock<Mutex> &) = delete;
|
||||
|
||||
// Shared locking
|
||||
void lock (void)
|
||||
{
|
||||
verify_lockable();
|
||||
mMutex->lock_shared();
|
||||
mOwns = true;
|
||||
}
|
||||
|
||||
bool try_lock (void)
|
||||
{
|
||||
verify_lockable();
|
||||
mOwns = mMutex->try_lock_shared();
|
||||
return mOwns;
|
||||
}
|
||||
|
||||
template< class Clock, class Duration >
|
||||
bool try_lock_until( const std::chrono::time_point<Clock,Duration>& cutoff )
|
||||
{
|
||||
verify_lockable();
|
||||
do
|
||||
{
|
||||
mOwns = mMutex->try_lock_shared();
|
||||
if (mOwns)
|
||||
return mOwns;
|
||||
}
|
||||
while (std::chrono::steady_clock::now() < cutoff);
|
||||
return false;
|
||||
}
|
||||
|
||||
template< class Rep, class Period >
|
||||
bool try_lock_for (const std::chrono::duration<Rep,Period>& rel_time)
|
||||
{
|
||||
return try_lock_until(std::chrono::steady_clock::now() + rel_time);
|
||||
}
|
||||
|
||||
void unlock (void)
|
||||
{
|
||||
using namespace std;
|
||||
if (!mOwns)
|
||||
throw system_error(make_error_code(errc::operation_not_permitted));
|
||||
mMutex->unlock_shared();
|
||||
mOwns = false;
|
||||
}
|
||||
|
||||
// Modifiers
|
||||
void swap (shared_lock<Mutex> & other) noexcept
|
||||
{
|
||||
using namespace std;
|
||||
swap(mMutex, other.mMutex);
|
||||
swap(mOwns, other.mOwns);
|
||||
}
|
||||
|
||||
mutex_type * release (void) noexcept
|
||||
{
|
||||
mutex_type * ptr = mMutex;
|
||||
mMutex = nullptr;
|
||||
mOwns = false;
|
||||
return ptr;
|
||||
}
|
||||
// Observers
|
||||
mutex_type * mutex (void) const noexcept
|
||||
{
|
||||
return mMutex;
|
||||
}
|
||||
|
||||
bool owns_lock (void) const noexcept
|
||||
{
|
||||
return mOwns;
|
||||
}
|
||||
|
||||
explicit operator bool () const noexcept
|
||||
{
|
||||
return owns_lock();
|
||||
}
|
||||
};
|
||||
|
||||
template< class Mutex >
|
||||
void swap( shared_lock<Mutex>& lhs, shared_lock<Mutex>& rhs ) noexcept
|
||||
{
|
||||
lhs.swap(rhs);
|
||||
}
|
||||
#endif // C++11
|
||||
} // Namespace mingw_stdthread
|
||||
|
||||
namespace std
|
||||
{
|
||||
// Because of quirks of the compiler, the common "using namespace std;"
|
||||
// directive would flatten the namespaces and introduce ambiguity where there
|
||||
// was none. Direct specification (std::), however, would be unaffected.
|
||||
// Take the safe option, and include only in the presence of MinGW's win32
|
||||
// implementation.
|
||||
#if (__cplusplus < 201703L) || (defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS))
|
||||
using mingw_stdthread::shared_mutex;
|
||||
#endif
|
||||
#if (__cplusplus < 201402L) || (defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS))
|
||||
using mingw_stdthread::shared_timed_mutex;
|
||||
using mingw_stdthread::shared_lock;
|
||||
#elif !defined(MINGW_STDTHREAD_REDUNDANCY_WARNING) // Skip repetition
|
||||
#define MINGW_STDTHREAD_REDUNDANCY_WARNING
|
||||
#pragma message "This version of MinGW seems to include a win32 port of\
|
||||
pthreads, and probably already has C++ std threading classes implemented,\
|
||||
based on pthreads. These classes, found in namespace std, are not overridden\
|
||||
by the mingw-std-thread library. If you would still like to use this\
|
||||
implementation (as it is more lightweight), use the classes provided in\
|
||||
namespace mingw_stdthread."
|
||||
#endif
|
||||
} // Namespace std
|
||||
#endif // MINGW_SHARED_MUTEX_H_
|
||||
360
randomx/mingw-std-threads-master/mingw.thread.h
Normal file
360
randomx/mingw-std-threads-master/mingw.thread.h
Normal file
@@ -0,0 +1,360 @@
|
||||
/**
|
||||
* @file mingw.thread.h
|
||||
* @brief std::thread implementation for MinGW
|
||||
* (c) 2013-2016 by Mega Limited, Auckland, New Zealand
|
||||
* @author Alexander Vassilev
|
||||
*
|
||||
* @copyright Simplified (2-clause) BSD License.
|
||||
* You should have received a copy of the license along with this
|
||||
* program.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
* @note
|
||||
* This file may become part of the mingw-w64 runtime package. If/when this happens,
|
||||
* the appropriate license will be added, i.e. this code will become dual-licensed,
|
||||
* and the current BSD 2-clause license will stay.
|
||||
*/
|
||||
|
||||
#ifndef WIN32STDTHREAD_H
|
||||
#define WIN32STDTHREAD_H
|
||||
|
||||
#if !defined(__cplusplus) || (__cplusplus < 201103L)
|
||||
#error A C++11 compiler is required!
|
||||
#endif
|
||||
|
||||
// Use the standard classes for std::, if available.
|
||||
#include <thread>
|
||||
|
||||
#include <cstddef> // For std::size_t
|
||||
#include <cerrno> // Detect error type.
|
||||
#include <exception> // For std::terminate
|
||||
#include <system_error> // For std::system_error
|
||||
#include <functional> // For std::hash
|
||||
#include <tuple> // For std::tuple
|
||||
#include <chrono> // For sleep timing.
|
||||
#include <memory> // For std::unique_ptr
|
||||
#include <iosfwd> // Stream output for thread ids.
|
||||
#include <utility> // For std::swap, std::forward
|
||||
|
||||
#include "mingw.invoke.h"
|
||||
|
||||
#if (defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR))
|
||||
#pragma message "The Windows API that MinGW-w32 provides is not fully compatible\
|
||||
with Microsoft's API. We'll try to work around this, but we can make no\
|
||||
guarantees. This problem does not exist in MinGW-w64."
|
||||
#include <windows.h> // No further granularity can be expected.
|
||||
#else
|
||||
#include <synchapi.h> // For WaitForSingleObject
|
||||
#include <handleapi.h> // For CloseHandle, etc.
|
||||
#include <sysinfoapi.h> // For GetNativeSystemInfo
|
||||
#include <processthreadsapi.h> // For GetCurrentThreadId
|
||||
#endif
|
||||
#include <process.h> // For _beginthreadex
|
||||
|
||||
#ifndef NDEBUG
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
#if !defined(_WIN32_WINNT) || (_WIN32_WINNT < 0x0501)
|
||||
#error To use the MinGW-std-threads library, you will need to define the macro _WIN32_WINNT to be 0x0501 (Windows XP) or higher.
|
||||
#endif
|
||||
|
||||
// Instead of INVALID_HANDLE_VALUE, _beginthreadex returns 0.
|
||||
namespace mingw_stdthread
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<std::size_t...>
|
||||
struct IntSeq {};
|
||||
|
||||
template<std::size_t N, std::size_t... S>
|
||||
struct GenIntSeq : GenIntSeq<N-1, N-1, S...> { };
|
||||
|
||||
template<std::size_t... S>
|
||||
struct GenIntSeq<0, S...> { typedef IntSeq<S...> type; };
|
||||
|
||||
// Use a template specialization to avoid relying on compiler optimization
|
||||
// when determining the parameter integer sequence.
|
||||
template<class Func, class T, typename... Args>
|
||||
class ThreadFuncCall;
|
||||
// We can't define the Call struct in the function - the standard forbids template methods in that case
|
||||
template<class Func, std::size_t... S, typename... Args>
|
||||
class ThreadFuncCall<Func, detail::IntSeq<S...>, Args...>
|
||||
{
|
||||
static_assert(sizeof...(S) == sizeof...(Args), "Args must match.");
|
||||
using Tuple = std::tuple<typename std::decay<Args>::type...>;
|
||||
typename std::decay<Func>::type mFunc;
|
||||
Tuple mArgs;
|
||||
|
||||
public:
|
||||
ThreadFuncCall(Func&& aFunc, Args&&... aArgs)
|
||||
: mFunc(std::forward<Func>(aFunc)),
|
||||
mArgs(std::forward<Args>(aArgs)...)
|
||||
{
|
||||
}
|
||||
|
||||
void callFunc()
|
||||
{
|
||||
detail::invoke(std::move(mFunc), std::move(std::get<S>(mArgs)) ...);
|
||||
}
|
||||
};
|
||||
|
||||
// Allow construction of threads without exposing implementation.
|
||||
class ThreadIdTool;
|
||||
} // Namespace "detail"
|
||||
|
||||
class thread
|
||||
{
|
||||
public:
|
||||
class id
|
||||
{
|
||||
DWORD mId = 0;
|
||||
friend class thread;
|
||||
friend class std::hash<id>;
|
||||
friend class detail::ThreadIdTool;
|
||||
explicit id(DWORD aId) noexcept : mId(aId){}
|
||||
public:
|
||||
id (void) noexcept = default;
|
||||
friend bool operator==(id x, id y) noexcept {return x.mId == y.mId; }
|
||||
friend bool operator!=(id x, id y) noexcept {return x.mId != y.mId; }
|
||||
friend bool operator< (id x, id y) noexcept {return x.mId < y.mId; }
|
||||
friend bool operator<=(id x, id y) noexcept {return x.mId <= y.mId; }
|
||||
friend bool operator> (id x, id y) noexcept {return x.mId > y.mId; }
|
||||
friend bool operator>=(id x, id y) noexcept {return x.mId >= y.mId; }
|
||||
|
||||
template<class _CharT, class _Traits>
|
||||
friend std::basic_ostream<_CharT, _Traits>&
|
||||
operator<<(std::basic_ostream<_CharT, _Traits>& __out, id __id)
|
||||
{
|
||||
if (__id.mId == 0)
|
||||
{
|
||||
return __out << "(invalid std::thread::id)";
|
||||
}
|
||||
else
|
||||
{
|
||||
return __out << __id.mId;
|
||||
}
|
||||
}
|
||||
};
|
||||
private:
|
||||
static constexpr HANDLE kInvalidHandle = nullptr;
|
||||
static constexpr DWORD kInfinite = 0xffffffffl;
|
||||
HANDLE mHandle;
|
||||
id mThreadId;
|
||||
|
||||
template <class Call>
|
||||
static unsigned __stdcall threadfunc(void* arg)
|
||||
{
|
||||
std::unique_ptr<Call> call(static_cast<Call*>(arg));
|
||||
call->callFunc();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned int _hardware_concurrency_helper() noexcept
|
||||
{
|
||||
SYSTEM_INFO sysinfo;
|
||||
// This is one of the few functions used by the library which has a nearly-
|
||||
// equivalent function defined in earlier versions of Windows. Include the
|
||||
// workaround, just as a reminder that it does exist.
|
||||
#if defined(_WIN32_WINNT) && (_WIN32_WINNT >= 0x0501)
|
||||
::GetNativeSystemInfo(&sysinfo);
|
||||
#else
|
||||
::GetSystemInfo(&sysinfo);
|
||||
#endif
|
||||
return sysinfo.dwNumberOfProcessors;
|
||||
}
|
||||
public:
|
||||
typedef HANDLE native_handle_type;
|
||||
id get_id() const noexcept {return mThreadId;}
|
||||
native_handle_type native_handle() const {return mHandle;}
|
||||
thread(): mHandle(kInvalidHandle), mThreadId(){}
|
||||
|
||||
thread(thread&& other)
|
||||
:mHandle(other.mHandle), mThreadId(other.mThreadId)
|
||||
{
|
||||
other.mHandle = kInvalidHandle;
|
||||
other.mThreadId = id{};
|
||||
}
|
||||
|
||||
thread(const thread &other)=delete;
|
||||
|
||||
template<class Func, typename... Args>
|
||||
explicit thread(Func&& func, Args&&... args) : mHandle(), mThreadId()
|
||||
{
|
||||
using ArgSequence = typename detail::GenIntSeq<sizeof...(Args)>::type;
|
||||
using Call = detail::ThreadFuncCall<Func, ArgSequence, Args...>;
|
||||
auto call = new Call(
|
||||
std::forward<Func>(func), std::forward<Args>(args)...);
|
||||
unsigned id_receiver;
|
||||
auto int_handle = _beginthreadex(NULL, 0, threadfunc<Call>,
|
||||
static_cast<LPVOID>(call), 0, &id_receiver);
|
||||
if (int_handle == 0)
|
||||
{
|
||||
mHandle = kInvalidHandle;
|
||||
int errnum = errno;
|
||||
delete call;
|
||||
// Note: Should only throw EINVAL, EAGAIN, EACCES
|
||||
throw std::system_error(errnum, std::generic_category());
|
||||
} else {
|
||||
mThreadId.mId = id_receiver;
|
||||
mHandle = reinterpret_cast<HANDLE>(int_handle);
|
||||
}
|
||||
}
|
||||
|
||||
bool joinable() const {return mHandle != kInvalidHandle;}
|
||||
|
||||
// Note: Due to lack of synchronization, this function has a race condition
|
||||
// if called concurrently, which leads to undefined behavior. The same applies
|
||||
// to all other member functions of this class, but this one is mentioned
|
||||
// explicitly.
|
||||
void join()
|
||||
{
|
||||
using namespace std;
|
||||
if (get_id() == id(GetCurrentThreadId()))
|
||||
throw system_error(make_error_code(errc::resource_deadlock_would_occur));
|
||||
if (mHandle == kInvalidHandle)
|
||||
throw system_error(make_error_code(errc::no_such_process));
|
||||
if (!joinable())
|
||||
throw system_error(make_error_code(errc::invalid_argument));
|
||||
WaitForSingleObject(mHandle, kInfinite);
|
||||
CloseHandle(mHandle);
|
||||
mHandle = kInvalidHandle;
|
||||
mThreadId = id{};
|
||||
}
|
||||
|
||||
~thread()
|
||||
{
|
||||
if (joinable())
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
std::printf("Error: Must join() or detach() a thread before \
|
||||
destroying it.\n");
|
||||
#endif
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
thread& operator=(const thread&) = delete;
|
||||
thread& operator=(thread&& other) noexcept
|
||||
{
|
||||
if (joinable())
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
std::printf("Error: Must join() or detach() a thread before \
|
||||
moving another thread to it.\n");
|
||||
#endif
|
||||
std::terminate();
|
||||
}
|
||||
swap(std::forward<thread>(other));
|
||||
return *this;
|
||||
}
|
||||
void swap(thread&& other) noexcept
|
||||
{
|
||||
std::swap(mHandle, other.mHandle);
|
||||
std::swap(mThreadId.mId, other.mThreadId.mId);
|
||||
}
|
||||
|
||||
static unsigned int hardware_concurrency() noexcept
|
||||
{
|
||||
static unsigned int cached = _hardware_concurrency_helper();
|
||||
return cached;
|
||||
}
|
||||
|
||||
void detach()
|
||||
{
|
||||
if (!joinable())
|
||||
{
|
||||
using namespace std;
|
||||
throw system_error(make_error_code(errc::invalid_argument));
|
||||
}
|
||||
if (mHandle != kInvalidHandle)
|
||||
{
|
||||
CloseHandle(mHandle);
|
||||
mHandle = kInvalidHandle;
|
||||
}
|
||||
mThreadId = id{};
|
||||
}
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
class ThreadIdTool
|
||||
{
|
||||
public:
|
||||
static thread::id make_id (DWORD base_id) noexcept
|
||||
{
|
||||
return thread::id(base_id);
|
||||
}
|
||||
};
|
||||
} // Namespace "detail"
|
||||
|
||||
namespace this_thread
|
||||
{
|
||||
inline thread::id get_id() noexcept
|
||||
{
|
||||
return detail::ThreadIdTool::make_id(GetCurrentThreadId());
|
||||
}
|
||||
inline void yield() noexcept {Sleep(0);}
|
||||
template< class Rep, class Period >
|
||||
void sleep_for( const std::chrono::duration<Rep,Period>& sleep_duration)
|
||||
{
|
||||
static constexpr DWORD kInfinite = 0xffffffffl;
|
||||
using namespace std::chrono;
|
||||
using rep = milliseconds::rep;
|
||||
rep ms = duration_cast<milliseconds>(sleep_duration).count();
|
||||
while (ms > 0)
|
||||
{
|
||||
constexpr rep kMaxRep = static_cast<rep>(kInfinite - 1);
|
||||
auto sleepTime = (ms < kMaxRep) ? ms : kMaxRep;
|
||||
Sleep(static_cast<DWORD>(sleepTime));
|
||||
ms -= sleepTime;
|
||||
}
|
||||
}
|
||||
template <class Clock, class Duration>
|
||||
void sleep_until(const std::chrono::time_point<Clock,Duration>& sleep_time)
|
||||
{
|
||||
sleep_for(sleep_time-Clock::now());
|
||||
}
|
||||
}
|
||||
} // Namespace mingw_stdthread
|
||||
|
||||
namespace std
|
||||
{
|
||||
// Because of quirks of the compiler, the common "using namespace std;"
|
||||
// directive would flatten the namespaces and introduce ambiguity where there
|
||||
// was none. Direct specification (std::), however, would be unaffected.
|
||||
// Take the safe option, and include only in the presence of MinGW's win32
|
||||
// implementation.
|
||||
#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
|
||||
using mingw_stdthread::thread;
|
||||
// Remove ambiguity immediately, to avoid problems arising from the above.
|
||||
//using std::thread;
|
||||
namespace this_thread
|
||||
{
|
||||
using namespace mingw_stdthread::this_thread;
|
||||
}
|
||||
#elif !defined(MINGW_STDTHREAD_REDUNDANCY_WARNING) // Skip repetition
|
||||
#define MINGW_STDTHREAD_REDUNDANCY_WARNING
|
||||
#pragma message "This version of MinGW seems to include a win32 port of\
|
||||
pthreads, and probably already has C++11 std threading classes implemented,\
|
||||
based on pthreads. These classes, found in namespace std, are not overridden\
|
||||
by the mingw-std-thread library. If you would still like to use this\
|
||||
implementation (as it is more lightweight), use the classes provided in\
|
||||
namespace mingw_stdthread."
|
||||
#endif
|
||||
|
||||
// Specialize hash for this implementation's thread::id, even if the
|
||||
// std::thread::id already has a hash.
|
||||
template<>
|
||||
struct hash<mingw_stdthread::thread::id>
|
||||
{
|
||||
typedef mingw_stdthread::thread::id argument_type;
|
||||
typedef size_t result_type;
|
||||
size_t operator() (const argument_type & i) const noexcept
|
||||
{
|
||||
return i.mId;
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif // WIN32STDTHREAD_H
|
||||
18
randomx/mingw-std-threads-master/tests/CMakeLists.txt
Normal file
18
randomx/mingw-std-threads-master/tests/CMakeLists.txt
Normal file
@@ -0,0 +1,18 @@
|
||||
project(stdthreadtest)
|
||||
cmake_minimum_required(VERSION 3.0)
|
||||
|
||||
string(CONCAT mingw_stdthreads_tests_compile_options_docstring
|
||||
"Compiler flags used to compile mingw-stdthreads's tests. By default "
|
||||
"it's -std=c++11 -Wall -Wextra")
|
||||
set(MINGW_STDTHREADS_TESTS_COMPILE_OPTIONS "-std=c++11;-Wall;-Wextra"
|
||||
CACHE STRING ${mingw_stdthreads_tests_compile_options_docstring})
|
||||
|
||||
set(MINGW_STDTHREADS_TESTS_ADDITIONAL_LINKER_FLAGS "" CACHE STRING
|
||||
"Optional linker flags to be passed when linking mingw-stdthreads's tests")
|
||||
|
||||
add_executable(${PROJECT_NAME} tests.cpp)
|
||||
target_compile_options(${PROJECT_NAME} PRIVATE
|
||||
${MINGW_STDTHREADS_TESTS_COMPILE_OPTIONS})
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE mingw_stdthreads)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
${MINGW_STDTHREADS_TESTS_ADDITIONAL_LINKER_FLAGS})
|
||||
450
randomx/mingw-std-threads-master/tests/tests.cpp
Normal file
450
randomx/mingw-std-threads-master/tests/tests.cpp
Normal file
@@ -0,0 +1,450 @@
|
||||
#ifndef MINGW_STDTHREADS_GENERATED_STDHEADERS
|
||||
#include <mingw.thread.h>
|
||||
#include <mingw.mutex.h>
|
||||
#include <mingw.condition_variable.h>
|
||||
#include <mingw.shared_mutex.h>
|
||||
#include <mingw.future.h>
|
||||
#else
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
#include <condition_variable>
|
||||
#include <shared_mutex>
|
||||
#include <future>
|
||||
#endif
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <typeinfo>
|
||||
|
||||
using namespace std;
|
||||
|
||||
int test_int = 42;
|
||||
|
||||
// Pre-declaration to suppress some warnings.
|
||||
void test_call_once(int, char const *);
|
||||
|
||||
int cond = 0;
|
||||
std::mutex m;
|
||||
std::shared_mutex sm;
|
||||
std::condition_variable cv;
|
||||
std::condition_variable_any cv_any;
|
||||
|
||||
template<class ... Args>
|
||||
void log (char const * fmtString, Args ...args) {
|
||||
printf(fmtString, args...);
|
||||
printf("\n");
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
void test_call_once(int a, const char* str)
|
||||
{
|
||||
log("test_call_once called with a=%d, str=%s", a, str);
|
||||
this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||
}
|
||||
|
||||
struct TestMove
|
||||
{
|
||||
std::string mStr;
|
||||
TestMove(const std::string& aStr): mStr(aStr){}
|
||||
TestMove(TestMove&& other): mStr(other.mStr+" moved")
|
||||
{ printf("%s: Object moved\n", mStr.c_str()); }
|
||||
TestMove(const TestMove&) : mStr()
|
||||
{
|
||||
assert(false && "TestMove: Object COPIED instead of moved");
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
void test_future_set_value (promise<T> & promise)
|
||||
{
|
||||
promise.set_value(T(test_int));
|
||||
}
|
||||
|
||||
template<>
|
||||
void test_future_set_value (promise<void> & promise)
|
||||
{
|
||||
promise.set_value();
|
||||
}
|
||||
|
||||
template<class T>
|
||||
bool test_future_get_value (future<T> & future)
|
||||
{
|
||||
return (future.get() == T(test_int));
|
||||
}
|
||||
|
||||
template<>
|
||||
bool test_future_get_value (future<void> & future)
|
||||
{
|
||||
future.get();
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
struct CustomAllocator
|
||||
{
|
||||
CustomAllocator (void) noexcept
|
||||
{
|
||||
}
|
||||
|
||||
template<class U>
|
||||
CustomAllocator (CustomAllocator<U> const &) noexcept
|
||||
{
|
||||
}
|
||||
|
||||
template<class U>
|
||||
CustomAllocator<T> & operator= (CustomAllocator<U> const &) noexcept
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
typedef T value_type;
|
||||
T * allocate (size_t n)
|
||||
{
|
||||
log("Used custom allocator to allocate %zu object(s).", n);
|
||||
return static_cast<T*>(std::malloc(n * sizeof(T)));
|
||||
}
|
||||
void deallocate (T * ptr, size_t n)
|
||||
{
|
||||
log("Used custom allocator to deallocate %zu object(s).", n);
|
||||
std::free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
void test_future ()
|
||||
{
|
||||
static_assert(is_move_constructible<promise<T> >::value,
|
||||
"std::promise must be move-constructible.");
|
||||
static_assert(is_move_assignable<promise<T> >::value,
|
||||
"std::promise must be move-assignable.");
|
||||
static_assert(!is_copy_constructible<promise<T> >::value,
|
||||
"std::promise must not be copy-constructible.");
|
||||
static_assert(!is_copy_assignable<promise<T> >::value,
|
||||
"std::promise must not be copy-assignable.");
|
||||
|
||||
static_assert(is_move_constructible<future<T> >::value,
|
||||
"std::future must be move-constructible.");
|
||||
static_assert(is_move_assignable<future<T> >::value,
|
||||
"std::future must be move-assignable.");
|
||||
static_assert(!is_copy_constructible<future<T> >::value,
|
||||
"std::future must not be copy-constructible.");
|
||||
static_assert(!is_copy_assignable<future<T> >::value,
|
||||
"std::future must not be copy-assignable.");
|
||||
|
||||
static_assert(is_move_constructible<shared_future<T> >::value,
|
||||
"std::shared_future must be move-constructible.");
|
||||
static_assert(is_move_assignable<shared_future<T> >::value,
|
||||
"std::shared_future must be move-assignable.");
|
||||
static_assert(is_copy_constructible<shared_future<T> >::value,
|
||||
"std::shared_future must be copy-constructible.");
|
||||
static_assert(is_copy_assignable<shared_future<T> >::value,
|
||||
"std::shared_future must be copy-assignable.");
|
||||
|
||||
log("\tMaking a few promises, and getting their futures...");
|
||||
promise<T> promise_value, promise_exception, promise_broken, promise_late;
|
||||
|
||||
future<T> future_value = promise_value.get_future();
|
||||
future<T> future_exception = promise_exception.get_future();
|
||||
future<T> future_broken = promise_broken.get_future();
|
||||
future<T> future_late = promise_late.get_future();
|
||||
|
||||
try {
|
||||
future<T> impossible_future = promise_value.get_future();
|
||||
log("WARNING: Promise failed to detect that its future was already retrieved.");
|
||||
} catch(...) {
|
||||
log("\tPromise successfully prevented redundant future retrieval.");
|
||||
}
|
||||
|
||||
log("\tPassing promises to a new thread...");
|
||||
thread t ([](promise<T> p_value, promise<T> p_exception, promise<T>, promise<T> p_late)
|
||||
{
|
||||
this_thread::sleep_for(std::chrono::seconds(1));
|
||||
try {
|
||||
throw std::runtime_error("Thrown during the thread.");
|
||||
} catch (...) {
|
||||
p_late.set_exception_at_thread_exit(std::current_exception());
|
||||
}
|
||||
test_future_set_value(p_value);
|
||||
try {
|
||||
throw std::runtime_error("Things happened as expected.");
|
||||
} catch (...) {
|
||||
p_exception.set_exception(std::current_exception());
|
||||
}
|
||||
this_thread::sleep_for(std::chrono::seconds(2));
|
||||
},
|
||||
std::move(promise_value),
|
||||
std::move(promise_exception),
|
||||
std::move(promise_broken),
|
||||
std::move(promise_late));
|
||||
t.detach();
|
||||
|
||||
try {
|
||||
bool was_expected = test_future_get_value(future_value);
|
||||
log("\tReceived %sexpected value.", (was_expected ? "" : "un"));
|
||||
} catch (...) {
|
||||
log("WARNING: Exception where there should be none!");
|
||||
throw;
|
||||
}
|
||||
try {
|
||||
test_future_get_value(future_exception);
|
||||
log("WARNING: Got a value where there should be an exception!");
|
||||
} catch (std::exception & e) {
|
||||
log("\tReceived an exception (\"%s\") as expected.", e.what());
|
||||
}
|
||||
|
||||
log("\tWaiting for the thread to exit...");
|
||||
try {
|
||||
test_future_get_value(future_late);
|
||||
log("WARNING: Got a value where there should be an exception!");
|
||||
} catch (std::exception & e) {
|
||||
log("\tReceived an exception (\"%s\") as expected.", e.what());
|
||||
}
|
||||
|
||||
try {
|
||||
test_future_get_value(future_broken);
|
||||
log("WARNING: Got a value where there should be an exception!");
|
||||
} catch (std::future_error & e) {
|
||||
log("\tReceived a future_error (\"%s\") as expected.", e.what());
|
||||
}
|
||||
|
||||
log("\tDeferring a function...");
|
||||
auto async_deferred = async(launch::deferred, [] (void) -> T
|
||||
{
|
||||
std::hash<std::thread::id> hasher;
|
||||
log("\t\tDeferred function called on thread %zu", hasher(std::this_thread::get_id()));
|
||||
if (!is_void<T>::value)
|
||||
return T(test_int);
|
||||
});
|
||||
log("\tCalling a function asynchronously...");
|
||||
auto async_async = async(launch::async, [] (void) -> T
|
||||
{
|
||||
std::hash<std::thread::id> hasher;
|
||||
log("\t\tAsynchronous function called on thread %zu", hasher(std::this_thread::get_id()));
|
||||
if (!is_void<T>::value)
|
||||
return T(test_int);
|
||||
});
|
||||
log("\tLetting the implementation decide...");
|
||||
auto async_either = async([] (thread::id other_id) -> T
|
||||
{
|
||||
std::hash<thread::id> hasher;
|
||||
log("\t\tFunction called on thread %zu. Implementation chose %s execution.", hasher(this_thread::get_id()), (this_thread::get_id() == other_id) ? "deferred" : "asynchronous");
|
||||
if (!is_void<T>::value)
|
||||
return T(test_int);
|
||||
}, this_thread::get_id());
|
||||
|
||||
log("\tFetching asynchronous result.");
|
||||
test_future_get_value(async_async);
|
||||
log("\tFetching deferred result.");
|
||||
test_future_get_value(async_deferred);
|
||||
log("\tFetching implementation-defined result.");
|
||||
test_future_get_value(async_either);
|
||||
|
||||
log("\tTesting async on pointer-to-member-function.");
|
||||
struct Helper
|
||||
{
|
||||
thread::id other_id;
|
||||
T call (void) const
|
||||
{
|
||||
std::hash<thread::id> hasher;
|
||||
log("\t\tFunction called on thread %zu. Implementation chose %s execution.", hasher(this_thread::get_id()), (this_thread::get_id() == other_id) ? "deferred" : "asynchronous");
|
||||
if (!is_void<T>::value)
|
||||
return T(test_int);
|
||||
}
|
||||
} test_class { this_thread::get_id() };
|
||||
auto async_member = async(Helper::call, test_class);
|
||||
log("\tFetching result.");
|
||||
test_future_get_value(async_member);
|
||||
}
|
||||
|
||||
#define TEST_SL_MV_CPY(ClassName) \
|
||||
static_assert(std::is_standard_layout<ClassName>::value, \
|
||||
"ClassName does not satisfy concept StandardLayoutType."); \
|
||||
static_assert(!std::is_move_constructible<ClassName>::value, \
|
||||
"ClassName must not be move-constructible."); \
|
||||
static_assert(!std::is_move_assignable<ClassName>::value, \
|
||||
"ClassName must not be move-assignable."); \
|
||||
static_assert(!std::is_copy_constructible<ClassName>::value, \
|
||||
"ClassName must not be copy-constructible."); \
|
||||
static_assert(!std::is_copy_assignable<ClassName>::value, \
|
||||
"ClassName must not be copy-assignable.");
|
||||
|
||||
int main()
|
||||
{
|
||||
#ifdef MINGW_STDTHREADS_GENERATED_STDHEADERS
|
||||
std::cout << "Using cmake-generated stdheaders, ";
|
||||
#endif
|
||||
static_assert(std::is_trivially_copyable<thread::id>::value,
|
||||
"thread::id must be trivially copyable.");
|
||||
|
||||
TEST_SL_MV_CPY(mutex)
|
||||
TEST_SL_MV_CPY(recursive_mutex)
|
||||
TEST_SL_MV_CPY(timed_mutex)
|
||||
TEST_SL_MV_CPY(recursive_timed_mutex)
|
||||
TEST_SL_MV_CPY(shared_mutex)
|
||||
TEST_SL_MV_CPY(shared_timed_mutex)
|
||||
TEST_SL_MV_CPY(condition_variable)
|
||||
TEST_SL_MV_CPY(condition_variable_any)
|
||||
static_assert(!std::is_move_constructible<once_flag>::value,
|
||||
"once_flag must not be move-constructible.");
|
||||
static_assert(!std::is_move_assignable<once_flag>::value,
|
||||
"once_flag must not be move-assignable.");
|
||||
static_assert(!std::is_copy_constructible<once_flag>::value,
|
||||
"once_flag must not be copy-constructible.");
|
||||
static_assert(!std::is_copy_assignable<once_flag>::value,
|
||||
"once_flag must not be copy-assignable.");
|
||||
|
||||
// With C++ feature level and target Windows version potentially affecting
|
||||
// behavior, make this information visible.
|
||||
{
|
||||
switch (__cplusplus)
|
||||
{
|
||||
case 201103L: std::cout << "Compiled in C++11"; break;
|
||||
case 201402L: std::cout << "Compiled in C++14"; break;
|
||||
case 201703L: std::cout << "Compiled in C++17"; break;
|
||||
default: std::cout << "Compiled in a non-conforming C++ compiler";
|
||||
}
|
||||
std::cout << ", targeting Windows ";
|
||||
static_assert(WINVER > 0x0500, "Windows NT and earlier are not supported.");
|
||||
switch (WINVER)
|
||||
{
|
||||
case 0x0501: std::cout << "XP"; break;
|
||||
case 0x0502: std::cout << "Server 2003"; break;
|
||||
case 0x0600: std::cout << "Vista"; break;
|
||||
case 0x0601: std::cout << "7"; break;
|
||||
case 0x0602: std::cout << "8"; break;
|
||||
case 0x0603: std::cout << "8.1"; break;
|
||||
case 0x0A00: std::cout << "10"; break;
|
||||
default: std::cout << "10+";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
{
|
||||
log("Testing serialization and hashing for thread::id...");
|
||||
std::cout << "Serialization:\t" << this_thread::get_id() << "\n";
|
||||
std::hash<thread::id> hasher;
|
||||
std::cout << "Hash:\t" << hasher(this_thread::get_id()) << "\n";
|
||||
}
|
||||
|
||||
// Regression test: Thread must copy any argument that is passed by value.
|
||||
{
|
||||
std::vector<std::thread> loop_threads;
|
||||
std::atomic<int> i_vals_touched [4];// { 0, 0, 0, 0 };
|
||||
for (int i = 0; i < 4; ++i)
|
||||
i_vals_touched[i].store(0, std::memory_order_relaxed);
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
loop_threads.push_back(std::thread([&](int c)
|
||||
{
|
||||
log("For-loop test thread got value: %i", c);
|
||||
i_vals_touched[c].fetch_add(1, std::memory_order_relaxed);
|
||||
}, i));
|
||||
}
|
||||
for (std::thread & thr : loop_threads)
|
||||
thr.join();
|
||||
for (int i = 0; i < 4; ++i)
|
||||
{
|
||||
if (i_vals_touched[i] != 1)
|
||||
{
|
||||
log("FATAL: Threads are not copying arguments!");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::thread t([](TestMove&& a, const char* b, int c) mutable
|
||||
{
|
||||
try
|
||||
{
|
||||
log("Worker thread started, sleeping for a while...");
|
||||
// Thread might move the string more than once.
|
||||
assert(a.mStr.substr(0, 15) == "move test moved");
|
||||
assert(!strcmp(b, "test message"));
|
||||
assert(c == -20);
|
||||
auto move2nd = std::move(a); //test move to final destination
|
||||
this_thread::sleep_for(std::chrono::milliseconds(1000));
|
||||
{
|
||||
lock_guard<mutex> lock(m);
|
||||
cond = 1;
|
||||
log("Notifying condvar");
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||
{
|
||||
lock_guard<decltype(sm)> lock(sm);
|
||||
cond = 2;
|
||||
log("Notifying condvar");
|
||||
cv_any.notify_all();
|
||||
}
|
||||
|
||||
this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||
{
|
||||
lock_guard<decltype(sm)> lock(sm);
|
||||
cond = 3;
|
||||
log("Notifying condvar");
|
||||
cv_any.notify_all();
|
||||
}
|
||||
|
||||
log("Worker thread finishing");
|
||||
}
|
||||
catch(std::exception& e)
|
||||
{
|
||||
printf("EXCEPTION in worker thread: %s\n", e.what());
|
||||
}
|
||||
},
|
||||
TestMove("move test"), "test message", -20);
|
||||
try
|
||||
{
|
||||
log("Main thread: Locking mutex, waiting on condvar...");
|
||||
{
|
||||
std::unique_lock<decltype(m)> lk(m);
|
||||
cv.wait(lk, []{ return cond >= 1;} );
|
||||
log("condvar notified, cond = %d", cond);
|
||||
assert(lk.owns_lock());
|
||||
}
|
||||
log("Main thread: Locking shared_mutex, waiting on condvar...");
|
||||
{
|
||||
std::unique_lock<decltype(sm)> lk(sm);
|
||||
cv_any.wait(lk, []{ return cond >= 2;} );
|
||||
log("condvar notified, cond = %d", cond);
|
||||
assert(lk.owns_lock());
|
||||
}
|
||||
log("Main thread: Locking shared_mutex in shared mode, waiting on condvar...");
|
||||
{
|
||||
std::shared_lock<decltype(sm)> lk(sm);
|
||||
cv_any.wait(lk, []{ return cond >= 3;} );
|
||||
log("condvar notified, cond = %d", cond);
|
||||
assert(lk.owns_lock());
|
||||
}
|
||||
log("Main thread: Waiting on worker join...");
|
||||
|
||||
t.join();
|
||||
log("Main thread: Worker thread joined");
|
||||
fflush(stdout);
|
||||
}
|
||||
catch(std::exception& e)
|
||||
{
|
||||
log("EXCEPTION in main thread: %s", e.what());
|
||||
}
|
||||
once_flag of;
|
||||
call_once(of, test_call_once, 1, "test");
|
||||
call_once(of, test_call_once, 1, "ERROR! Should not be called second time");
|
||||
log("Test complete");
|
||||
|
||||
{
|
||||
log("Testing implementation of <future>...");
|
||||
test_future<int>();
|
||||
test_future<void>();
|
||||
test_future<int &>();
|
||||
test_future<int const &>();
|
||||
test_future<int volatile &>();
|
||||
test_future<int const volatile &>();
|
||||
log("Testing <future>'s use of allocators. Should allocate, then deallocate.");
|
||||
promise<int> allocated_promise (std::allocator_arg, CustomAllocator<unsigned>());
|
||||
allocated_promise.set_value(7);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,226 @@
|
||||
|
||||
<#
|
||||
.SYNOPSIS
|
||||
Generate std-like headers which you can use just like standard c++'s ones.
|
||||
For example include <thread>.
|
||||
.PARAMETER GccPath
|
||||
Path to GCC. Will try to use the default one from $env:Path if not
|
||||
specified.
|
||||
.PARAMETER MinGWStdThreadsPath
|
||||
Path to mingw-std-threads folder. Will try to use $PSScriptRoot/.. if not
|
||||
specified.
|
||||
.PARAMETER DestinationFolder
|
||||
Destination folder where generated headers will be saved to
|
||||
.PARAMETER GenerateCompilerWrapperWithFileName
|
||||
If specified, will be generated a wrapper batch script for g++ which automatically
|
||||
adds $DestinationFolder as an include path
|
||||
.PARAMETER Interactive
|
||||
Use this switch if you want to pass parameters interactively
|
||||
#>
|
||||
[CmdletBinding(PositionalBinding = $false)]
|
||||
param (
|
||||
# Path of GCC
|
||||
[Parameter(Mandatory = $false,
|
||||
ValueFromPipelineByPropertyName = $true,
|
||||
ParameterSetName = "NonInteractive",
|
||||
HelpMessage = "Pathtof GCC. Will try to use the default one from `$env:Path if not specified.")]
|
||||
[string]
|
||||
$GccPath,
|
||||
|
||||
# Path of mingw-std-threads
|
||||
[Parameter(Mandatory = $false,
|
||||
ValueFromPipelineByPropertyName = $true,
|
||||
ParameterSetName = "NonInteractive",
|
||||
HelpMessage = "Path to mingw-std-threads folder. Will try to use `$PSScriptRoot/.. if not specified.")]
|
||||
[string]
|
||||
$MinGWStdThreadsPath,
|
||||
|
||||
# Destination folder path
|
||||
[Parameter(Mandatory = $true,
|
||||
ValueFromPipelineByPropertyName = $true,
|
||||
ParameterSetName = "NonInteractive",
|
||||
HelpMessage = "Destination folder where generated headers will be saved to")]
|
||||
[ValidateNotNullOrEmpty()]
|
||||
[string]
|
||||
$DestinationFolder,
|
||||
|
||||
# Compiler wrapper path
|
||||
[Parameter(Mandatory = $false,
|
||||
ValueFromPipelineByPropertyName = $true,
|
||||
ParameterSetName = "NonInteractive",
|
||||
HelpMessage = "If specified, will generate a wrapper batch script for g++ which automatically adds `$DestinationFolder as an include path")]
|
||||
[string]
|
||||
$GenerateCompilerWrapperWithFileName,
|
||||
|
||||
# Interactive Switch
|
||||
[Parameter(ParameterSetName = "Interactive")]
|
||||
[switch]
|
||||
$Interactive = $false
|
||||
)
|
||||
|
||||
# Stop execution when encountering any error (includeing Write-Error command)
|
||||
$ErrorActionPreference = "Stop";
|
||||
|
||||
# headers to be generated
|
||||
$headers = @("condition_variable", "future", "mutex", "shared_mutex", "thread")
|
||||
|
||||
# ask for user input in interactive mode
|
||||
if ($Interactive) {
|
||||
Write-Host "Generate std-like headers which you can use just like standard c++'s ones."
|
||||
Write-Host "Something like `"include <thread>`"."
|
||||
|
||||
$DestinationFolder = Read-Host -Prompt "Destination folder into which headers will be generated"
|
||||
$GccPath = Read-Host -Prompt "Path to GCC, optional. Press Enter to let it be retrieved from PATH"
|
||||
$MinGWStdThreadsPath = Read-Host -Prompt "Path to mingw-std-threads folder, optional. Press Enter to use default value"
|
||||
$GenerateCompilerWrapperWithFileName = Read-Host "Optional path to which a wrapper batch script for g++ will be created. It will automatically use $DestinationFolder as an include path. Press Enter to skip"
|
||||
}
|
||||
|
||||
if (-not $GccPath) {
|
||||
$GccPath = "gcc"
|
||||
}
|
||||
|
||||
# set default value of $MinGWStdThreadsPath
|
||||
if (-not $MinGWStdThreadsPath) {
|
||||
$scriptFilePath = $null
|
||||
if ($MyInvocation.MyCommand.CommandType -eq "ExternalScript") {
|
||||
$scriptFilePath = $MyInvocation.MyCommand.Definition
|
||||
}
|
||||
else {
|
||||
$scriptFilePath = [Environment]::GetCommandLineArgs()[0]
|
||||
}
|
||||
$MinGWStdThreadsPath = (Get-Item -LiteralPath $scriptFilePath).Directory.Parent.FullName
|
||||
}
|
||||
|
||||
# Normalize paths
|
||||
$GccPath = (Get-Command -Name $GccPath).Source
|
||||
$MinGWStdThreadsPath = Resolve-Path -LiteralPath $MinGWStdThreadsPath
|
||||
$DestinationFolder = New-Item -Path $DestinationFolder -ItemType "Directory" -Force
|
||||
|
||||
Write-Output "GccPath: $GccPath"
|
||||
Write-Output "MinGWStdThreadsPath: $MinGWStdThreadsPath"
|
||||
Write-Output "DestinationFolder: $DestinationFolder"
|
||||
if ($GenerateCompilerWrapperWithFileName) {
|
||||
Write-Output "GenerateCompilerWrapperWithFileName: $GenerateCompilerWrapperWithFileName"
|
||||
}
|
||||
|
||||
# Find path of real headers
|
||||
Write-Output "Retrieving system header search paths..."
|
||||
|
||||
$readingIncludePath = $false
|
||||
# Empty array which will later store include paths
|
||||
$includePaths = @()
|
||||
|
||||
# Launch GCC
|
||||
$processStartInfo = New-Object -TypeName "System.Diagnostics.ProcessStartInfo"
|
||||
$processStartInfo.FileName = $GccPath
|
||||
$processStartInfo.Arguments = "-xc++ -E -v -"
|
||||
$processStartInfo.RedirectStandardInput = $true
|
||||
$processStartInfo.RedirectStandardOutput = $true
|
||||
$processStartInfo.RedirectStandardError = $true
|
||||
$processStartInfo.UseShellExecute = $false
|
||||
|
||||
$outputLines = @()
|
||||
$gcc = New-Object -TypeName "System.Diagnostics.Process"
|
||||
try {
|
||||
$gcc.StartInfo = $processStartInfo
|
||||
$gcc.Start() | Out-Null
|
||||
$gcc.StandardInput.Close()
|
||||
$gcc.WaitForExit()
|
||||
$output = $gcc.StandardError.ReadToEnd()
|
||||
$outputLines = $output -split "[\r\n]" |
|
||||
ForEach-Object { return $_.Trim() } |
|
||||
Where-Object { return $_.Length -gt 0 }
|
||||
}
|
||||
finally {
|
||||
$gcc.StandardInput.Dispose()
|
||||
$gcc.StandardOutput.Dispose()
|
||||
$gcc.StandardError.Dispose()
|
||||
$gcc.Dispose()
|
||||
}
|
||||
|
||||
# Parse Output
|
||||
foreach ($line in $outputLines) {
|
||||
if (-not $readingIncludePath) {
|
||||
if ($line -match "#include <...> search starts here:") {
|
||||
$readingIncludePath = $true
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if ($line -match "End of search list.") {
|
||||
break
|
||||
}
|
||||
|
||||
Write-Output "Retrieved search path: $line"
|
||||
$includePaths += $line
|
||||
}
|
||||
|
||||
if ($includePaths.Count -eq 0) {
|
||||
Write-Error "Error: didn't find any #inlcude <...> search paths"
|
||||
}
|
||||
|
||||
# look for std header paths
|
||||
Write-Output "Searching for standard headers..."
|
||||
$stdHeaders = @()
|
||||
# set a label called "nextHeader" to allow continue with outer loop
|
||||
:nextHeader foreach ($header in $headers) {
|
||||
# check if mingw-std-threads headers exist
|
||||
$myHeader = "mingw.$header.h"
|
||||
$myHeader = Join-Path -Path $MinGWStdThreadsPath -ChildPath $myHeader
|
||||
if (-not (Test-Path -LiteralPath $myHeader -PathType "Leaf")) {
|
||||
Write-Error "Error: mingw-std-threads header not found: $myHeader"
|
||||
}
|
||||
|
||||
foreach ($inludePath in $includePaths) {
|
||||
$fullPath = Join-Path -Path $inludePath -ChildPath $header
|
||||
if (Test-Path -LiteralPath $fullPath -PathType "Leaf") {
|
||||
$fullPath = (Get-Item -LiteralPath $fullPath).FullName
|
||||
$stdHeaders += $fullPath
|
||||
Write-Output "Found std header: $fullPath"
|
||||
# if found matching header, continue with outer loop
|
||||
continue nextHeader
|
||||
}
|
||||
}
|
||||
|
||||
Write-Error "Error: didn't find $header in any search paths"
|
||||
}
|
||||
|
||||
# generate headers
|
||||
Write-Output "Generating headers..."
|
||||
foreach ($stdHeader in $stdHeaders) {
|
||||
$headerFileName = (Get-Item -LiteralPath $stdHeader).Name
|
||||
$myHeader = "mingw.$headerFileName.h"
|
||||
$myHeader = Join-Path -Path $MinGWStdThreadsPath -ChildPath $myHeader
|
||||
Write-Output "Generating <$headerFileName> from $myHeader and $stdHeader..."
|
||||
|
||||
# both two headers should already have include guards
|
||||
# but we still add a #pragma once just to be safe
|
||||
$content = "#pragma once`r`n"
|
||||
$content += "#include `"$stdHeader`"`r`n"
|
||||
$content += "#include `"$myHeader`"`r`n";
|
||||
|
||||
$outputFileName = Join-Path -Path $DestinationFolder -ChildPath $headerFileName
|
||||
Write-Output "Writing file: $outputFileName"
|
||||
|
||||
# use .NET's method to output lines to avoid UTF-8 BOM
|
||||
$noBomEncoding = New-Object -TypeName "System.Text.UTF8Encoding" -ArgumentList $false
|
||||
[IO.File]::WriteAllText($outputFileName, $content, $noBomEncoding)
|
||||
}
|
||||
|
||||
$message = "Successfully generated std-like headers. Use them by adding "
|
||||
$message += "`"-I$DestinationFolder`" to your compiler command line parameters"
|
||||
Write-Output $message
|
||||
|
||||
if ($GenerateCompilerWrapperWithFileName) {
|
||||
$compilerFolder = Split-Path -LiteralPath $GccPath
|
||||
$compiler = Join-Path -Path $compilerFolder -ChildPath "g++"
|
||||
$command = "@echo off`r`n"
|
||||
$command += "$compiler %* `"-I$DestinationFolder`""
|
||||
$wrapper = New-Item -Path $GenerateCompilerWrapperWithFileName -ItemType "File" -Force
|
||||
|
||||
# use .NET's method to output lines to avoid UTF-8 BOM
|
||||
$noBomEncoding = New-Object -TypeName "System.Text.UTF8Encoding" -ArgumentList $false
|
||||
[IO.File]::WriteAllText($wrapper, $command, $noBomEncoding)
|
||||
|
||||
Write-Output "Wrapper batch script successfully generated to $wrapper"
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
powershell -NonInteractive -ExecutionPolicy ByPass -File %~dp0Generate-StdLikeHeaders.ps1 %*
|
||||
@@ -0,0 +1 @@
|
||||
powershell -ExecutionPolicy ByPass -File %~dp0Generate-StdLikeHeaders.ps1 -Interactive
|
||||
71
randomx/program.hpp
Normal file
71
randomx/program.hpp
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <ostream>
|
||||
#include "common.hpp"
|
||||
#include "instruction.hpp"
|
||||
#include "blake2/endian.h"
|
||||
|
||||
namespace randomx {
|
||||
|
||||
struct ProgramConfiguration {
|
||||
uint64_t eMask[2];
|
||||
uint32_t readReg0, readReg1, readReg2, readReg3;
|
||||
};
|
||||
|
||||
class Program {
|
||||
public:
|
||||
Instruction& operator()(int pc) {
|
||||
return programBuffer[pc];
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream& os, const Program& p) {
|
||||
p.print(os);
|
||||
return os;
|
||||
}
|
||||
uint64_t getEntropy(int i) {
|
||||
return load64(&entropyBuffer[i]);
|
||||
}
|
||||
uint32_t getSize() {
|
||||
return RANDOMX_PROGRAM_SIZE;
|
||||
}
|
||||
private:
|
||||
void print(std::ostream& os) const {
|
||||
for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
|
||||
auto instr = programBuffer[i];
|
||||
os << instr;
|
||||
}
|
||||
}
|
||||
uint64_t entropyBuffer[16];
|
||||
Instruction programBuffer[RANDOMX_PROGRAM_SIZE]; //256 每个指令64bit
|
||||
};
|
||||
|
||||
static_assert(sizeof(Program) % 64 == 0, "Invalid size of class randomx::Program");
|
||||
}
|
||||
492
randomx/randomx.cpp
Normal file
492
randomx/randomx.cpp
Normal file
@@ -0,0 +1,492 @@
|
||||
/*
|
||||
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
||||
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holder nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "mingw-std-threads-master/mingw.thread.h" //这个时因为使用#include <thread>会报错,mingw中thread不支持std,如果时其他编译器如vs等,就不需要;
|
||||
#include "randomx.h"
|
||||
#include "dataset.hpp"
|
||||
#include "vm_interpreted.hpp"
|
||||
#include "vm_interpreted_light.hpp"
|
||||
#include "vm_compiled.hpp"
|
||||
#include "vm_compiled_light.hpp"
|
||||
#include "blake2/blake2.h"
|
||||
#include "cpu.hpp"
|
||||
#include <cassert>
|
||||
#include <limits>
|
||||
|
||||
|
||||
randomx_flags randomx_get_flags() {
|
||||
randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
|
||||
randomx::Cpu cpu;
|
||||
#ifdef __OpenBSD__
|
||||
if (flags == RANDOMX_FLAG_JIT) {
|
||||
flags |= RANDOMX_FLAG_SECURE;
|
||||
}
|
||||
#endif
|
||||
if (HAVE_AES && cpu.hasAes()) {
|
||||
flags |= RANDOMX_FLAG_HARD_AES;
|
||||
}
|
||||
if (randomx_argon2_impl_avx2() != nullptr && cpu.hasAvx2()) {
|
||||
flags |= RANDOMX_FLAG_ARGON2_AVX2;
|
||||
}
|
||||
if (randomx_argon2_impl_ssse3() != nullptr && cpu.hasSsse3()) {
|
||||
flags |= RANDOMX_FLAG_ARGON2_SSSE3;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
randomx_cache *randomx_alloc_cache(randomx_flags flags) {
|
||||
randomx_cache *cache = nullptr;
|
||||
auto impl = randomx::selectArgonImpl(flags);
|
||||
if (impl == nullptr) {
|
||||
return cache;
|
||||
}
|
||||
|
||||
try {
|
||||
cache = new randomx_cache();
|
||||
cache->argonImpl = impl;
|
||||
switch ((int)(flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES))) {
|
||||
case RANDOMX_FLAG_DEFAULT:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
|
||||
cache->jit = nullptr;
|
||||
cache->initialize = &randomx::initCache;
|
||||
cache->datasetInit = &randomx::initDataset;
|
||||
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize); //randomx::CacheSize =256MB
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
|
||||
cache->jit = new randomx::JitCompiler();
|
||||
cache->initialize = &randomx::initCacheCompile;
|
||||
cache->datasetInit = cache->jit->getDatasetInitFunc();
|
||||
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize);
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_LARGE_PAGES:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
|
||||
cache->jit = nullptr;
|
||||
cache->initialize = &randomx::initCache;
|
||||
cache->datasetInit = &randomx::initDataset;
|
||||
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize);
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
|
||||
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
|
||||
cache->jit = new randomx::JitCompiler();
|
||||
cache->initialize = &randomx::initCacheCompile;
|
||||
cache->datasetInit = cache->jit->getDatasetInitFunc();
|
||||
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize);
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
if (cache != nullptr) {
|
||||
randomx_release_cache(cache);
|
||||
cache = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) {
|
||||
assert(cache != nullptr);
|
||||
assert(keySize == 0 || key != nullptr);
|
||||
|
||||
cache->initialize(cache, key, keySize);
|
||||
|
||||
//std::string cacheKey;
|
||||
//cacheKey.assign((const char *)key, keySize); //将字符串key中keysize个长度的字符赋值给cacheKey;
|
||||
//if (cache->cacheKey != cacheKey || !cache->isInitialized()) {
|
||||
// cache->initialize(cache, key, keySize);
|
||||
// cache->cacheKey = cacheKey;
|
||||
//}
|
||||
}
|
||||
|
||||
void randomx_release_cache(randomx_cache* cache) {
|
||||
assert(cache != nullptr);
|
||||
if (cache->memory != nullptr) {
|
||||
cache->dealloc(cache);
|
||||
}
|
||||
delete cache;
|
||||
}
|
||||
|
||||
randomx_dataset *randomx_alloc_dataset(randomx_flags flags) {
|
||||
|
||||
//fail on 32-bit systems if DatasetSize is >= 4 GiB
|
||||
if (randomx::DatasetSize > std::numeric_limits<size_t>::max()) {
|
||||
return nullptr;
|
||||
}
|
||||
//printf("xxxxx\n");
|
||||
randomx_dataset *dataset = nullptr;
|
||||
|
||||
//try {
|
||||
dataset = new randomx_dataset();
|
||||
if (flags & RANDOMX_FLAG_LARGE_PAGES) {
|
||||
dataset->dealloc = &randomx::deallocDataset<randomx::LargePageAllocator>;
|
||||
dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::DatasetSize);
|
||||
}
|
||||
else {
|
||||
dataset->dealloc = &randomx::deallocDataset<randomx::DefaultAllocator>;
|
||||
dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::DatasetSize);
|
||||
}
|
||||
//}
|
||||
//catch (std::exception &ex) {
|
||||
// if (dataset != nullptr) {
|
||||
// randomx_release_dataset(dataset);
|
||||
// dataset = nullptr;
|
||||
// }
|
||||
//}
|
||||
|
||||
return dataset;
|
||||
}
|
||||
|
||||
constexpr unsigned long DatasetItemCount = randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE;
|
||||
|
||||
unsigned long randomx_dataset_item_count() {
|
||||
return DatasetItemCount;
|
||||
}
|
||||
|
||||
void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) {
|
||||
assert(dataset != nullptr);
|
||||
assert(cache != nullptr);
|
||||
assert(startItem < DatasetItemCount && itemCount <= DatasetItemCount);
|
||||
assert(startItem + itemCount <= DatasetItemCount);
|
||||
cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount);
|
||||
}
|
||||
|
||||
void *randomx_get_dataset_memory(randomx_dataset *dataset) {
|
||||
assert(dataset != nullptr);
|
||||
return dataset->memory;
|
||||
}
|
||||
|
||||
void randomx_release_dataset(randomx_dataset *dataset) {
|
||||
assert(dataset != nullptr);
|
||||
dataset->dealloc(dataset);
|
||||
delete dataset;
|
||||
}
|
||||
|
||||
randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset) {
|
||||
assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM));
|
||||
assert(cache == nullptr || cache->isInitialized());
|
||||
assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM));
|
||||
|
||||
randomx_vm *vm = nullptr;
|
||||
|
||||
try {
|
||||
switch ((int)(flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES))) {
|
||||
case RANDOMX_FLAG_DEFAULT:
|
||||
vm = new randomx::InterpretedLightVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM:
|
||||
vm = new randomx::InterpretedVmDefault();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT:
|
||||
if (flags & RANDOMX_FLAG_SECURE) {
|
||||
vm = new randomx::CompiledLightVmDefaultSecure();
|
||||
}
|
||||
else {
|
||||
vm = new randomx::CompiledLightVmDefault();
|
||||
}
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT:
|
||||
if (flags & RANDOMX_FLAG_SECURE) {
|
||||
vm = new randomx::CompiledVmDefaultSecure();
|
||||
}
|
||||
else {
|
||||
vm = new randomx::CompiledVmDefault();
|
||||
}
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_HARD_AES:
|
||||
vm = new randomx::InterpretedLightVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES:
|
||||
vm = new randomx::InterpretedVmHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
|
||||
if (flags & RANDOMX_FLAG_SECURE) {
|
||||
vm = new randomx::CompiledLightVmHardAesSecure();
|
||||
}
|
||||
else {
|
||||
vm = new randomx::CompiledLightVmHardAes();
|
||||
}
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
|
||||
if (flags & RANDOMX_FLAG_SECURE) {
|
||||
vm = new randomx::CompiledVmHardAesSecure();
|
||||
}
|
||||
else {
|
||||
vm = new randomx::CompiledVmHardAes();
|
||||
}
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::InterpretedLightVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::InterpretedVmLargePage();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
|
||||
if (flags & RANDOMX_FLAG_SECURE) {
|
||||
vm = new randomx::CompiledLightVmLargePageSecure();
|
||||
}
|
||||
else {
|
||||
vm = new randomx::CompiledLightVmLargePage();
|
||||
}
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
|
||||
if (flags & RANDOMX_FLAG_SECURE) {
|
||||
vm = new randomx::CompiledVmLargePageSecure();
|
||||
}
|
||||
else {
|
||||
vm = new randomx::CompiledVmLargePage();
|
||||
}
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::InterpretedLightVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
|
||||
vm = new randomx::InterpretedVmLargePageHardAes();
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
|
||||
if (flags & RANDOMX_FLAG_SECURE) {
|
||||
vm = new randomx::CompiledLightVmLargePageHardAesSecure();
|
||||
}
|
||||
else {
|
||||
vm = new randomx::CompiledLightVmLargePageHardAes();
|
||||
}
|
||||
break;
|
||||
|
||||
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
|
||||
if (flags & RANDOMX_FLAG_SECURE) {
|
||||
vm = new randomx::CompiledVmLargePageHardAesSecure();
|
||||
}
|
||||
else {
|
||||
vm = new randomx::CompiledVmLargePageHardAes();
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
UNREACHABLE;
|
||||
}
|
||||
|
||||
if(cache != nullptr) {
|
||||
// printf("cachedddddddddddd\n"); yes
|
||||
vm->setCache(cache);
|
||||
vm->cacheKey = cache->cacheKey;
|
||||
}
|
||||
|
||||
if(dataset != nullptr){
|
||||
// printf("datasetdddddddddddd\n"); no
|
||||
vm->setDataset(dataset);
|
||||
}
|
||||
|
||||
vm->allocate(); //allocate the scratchpad
|
||||
}
|
||||
catch (std::exception &ex) {
|
||||
delete vm;
|
||||
vm = nullptr;
|
||||
}
|
||||
|
||||
return vm;
|
||||
}
|
||||
|
||||
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
|
||||
assert(machine != nullptr);
|
||||
assert(cache != nullptr && cache->isInitialized());
|
||||
if (machine->cacheKey != cache->cacheKey) {
|
||||
machine->setCache(cache);
|
||||
machine->cacheKey = cache->cacheKey;
|
||||
}
|
||||
}
|
||||
|
||||
void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) {
|
||||
assert(machine != nullptr);
|
||||
assert(dataset != nullptr);
|
||||
machine->setDataset(dataset);
|
||||
}
|
||||
|
||||
void randomx_destroy_vm(randomx_vm *machine) {
|
||||
assert(machine != nullptr);
|
||||
delete machine;
|
||||
}
|
||||
|
||||
void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {
|
||||
assert(machine != nullptr);
|
||||
assert(inputSize == 0 || input != nullptr);
|
||||
assert(output != nullptr);
|
||||
alignas(16) uint64_t tempHash[8];
|
||||
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
|
||||
assert(blakeResult == 0);
|
||||
machine->initScratchpad(&tempHash);
|
||||
machine->resetRoundingMode();
|
||||
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { //RANDOMX_PROGRAM_COUNT =8
|
||||
machine->run(&tempHash);
|
||||
blakeResult = blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
|
||||
assert(blakeResult == 0);
|
||||
}
|
||||
machine->run(&tempHash);
|
||||
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
const uint8_t myKey[] ={ 0x67,0x0f,0x0b,0x99,0x1d,0xc3,0xfe,0x80,0x56,0x04,0xea,0xc3,0x79,0x35,0x1d,0x9a,0xb5,0x21,0xef,0xac,0x60,0x95,0xf2,0x6b,0xca,0xa3,0xa8,0x56,0x83,0x89,0x77,0x99};
|
||||
|
||||
const uint8_t myInput[] = { 0x0e,0x0e,0xe4,0xf3,0xf4,0xff,0x05,0x81,0xd4,0x1a,0x87,0xb3,0xa3,0xd7,0xb6,0x40,0x24,0x8d,0x9f,0x34,0x86,0x07,0xe9,0x67,0x55,0x5c,0xce,0x2f,0x40,0xe6,0x5e,0x5e,0x40,0x45,0x46,0x56,0x9e,0xb3,0xe6,
|
||||
//0xee,0x85,0x00,0x00,
|
||||
0xe6,0x23,0x00,0x00,
|
||||
0xcb,0x84,0x81,0x0b,0xc0,0x28,0xa5,0x76,0xec,0xd2,0x0b,0xf9,0xee,0xee,0x43,0x78,0x9c,0x3d,0x55,0xe5,0x54,0xe3,0x05,0xb3,0x46,0x02,0x09,0x64,0x43,0x21,0xd2,0x9e,0x28};
|
||||
|
||||
uint8_t hash[RANDOMX_HASH_SIZE];
|
||||
int initThreadCount =16;
|
||||
randomx_cache* cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
|
||||
|
||||
randomx_init_cache(cache, myKey, sizeof myKey);
|
||||
|
||||
uint32_t datasetItemCount = randomx_dataset_item_count();
|
||||
|
||||
printf("datasetItemCount=%d\n", datasetItemCount);
|
||||
|
||||
randomx_dataset* dataset = randomx_alloc_dataset(RANDOMX_FLAG_DEFAULT);
|
||||
|
||||
std::vector<std::thread> threads;
|
||||
auto perThread = datasetItemCount / initThreadCount;
|
||||
auto remainder = datasetItemCount % initThreadCount;
|
||||
uint32_t startItem = 0;
|
||||
for (int i = 0; i < initThreadCount; ++i) {
|
||||
auto count = perThread + (i == initThreadCount - 1 ? remainder : 0);
|
||||
threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count));
|
||||
startItem += count;
|
||||
}
|
||||
for (unsigned i = 0; i < threads.size(); ++i) {
|
||||
threads[i].join();
|
||||
}
|
||||
|
||||
randomx_release_cache(cache);
|
||||
|
||||
randomx_vm* vm = randomx_create_vm(RANDOMX_FLAG_FULL_MEM,nullptr, dataset);
|
||||
|
||||
randomx_calculate_hash(vm, &myInput, sizeof myInput, hash);
|
||||
|
||||
randomx_destroy_vm(vm);
|
||||
|
||||
randomx_release_dataset(dataset);
|
||||
|
||||
for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i)
|
||||
printf("%02x", hash[i] & 0xff);
|
||||
|
||||
return 0;
|
||||
}
|
||||
//8a48e5f9db45ab79d98574c4d81954fe6ac63842214aff73c244b26330b7c9
|
||||
*/
|
||||
|
||||
/*
|
||||
int main() {
|
||||
|
||||
const uint8_t myKey[] ={ 0x67,0x0f,0x0b,0x99,0x1d,0xc3,0xfe,0x80,0x56,0x04,0xea,0xc3,0x79,0x35,0x1d,0x9a,0xb5,0x21,0xef,0xac,0x60,0x95,0xf2,0x6b,0xca,0xa3,0xa8,0x56,0x83,0x89,0x77,0x99};
|
||||
|
||||
const uint8_t myInput[] = { 0x0e,0x0e,0xe4,0xf3,0xf4,0xff,0x05,0x81,0xd4,0x1a,0x87,0xb3,0xa3,0xd7,0xb6,0x40,0x24,0x8d,0x9f,0x34,0x86,0x07,0xe9,0x67,0x55,0x5c,0xce,0x2f,0x40,0xe6,0x5e,0x5e,0x40,0x45,0x46,0x56,0x9e,0xb3,0xe6,
|
||||
//0xee,0x85,0x00,0x00,
|
||||
0xe6,0x23,0x00,0x00,
|
||||
0xcb,0x84,0x81,0x0b,0xc0,0x28,0xa5,0x76,0xec,0xd2,0x0b,0xf9,0xee,0xee,0x43,0x78,0x9c,0x3d,0x55,0xe5,0x54,0xe3,0x05,0xb3,0x46,0x02,0x09,0x64,0x43,0x21,0xd2,0x9e,0x28};
|
||||
|
||||
char hash[RANDOMX_HASH_SIZE];
|
||||
|
||||
//randomx_flags flags = randomx_get_flags();
|
||||
randomx_flags flags = RANDOMX_FLAG_DEFAULT;
|
||||
randomx_cache *myCache = randomx_alloc_cache(flags);
|
||||
|
||||
randomx_init_cache(myCache, &myKey, sizeof myKey);
|
||||
|
||||
randomx_vm *myMachine = randomx_create_vm(flags, myCache, NULL);
|
||||
|
||||
randomx_calculate_hash(myMachine, &myInput, sizeof myInput, hash);
|
||||
|
||||
randomx_destroy_vm(myMachine);
|
||||
randomx_release_cache(myCache);
|
||||
|
||||
for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i)
|
||||
printf("%02x", hash[i] & 0xff);
|
||||
|
||||
printf("\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
randomx_cache* cache;
|
||||
randomx_vm* vm = nullptr;
|
||||
|
||||
int main(){
|
||||
|
||||
const uint8_t myKey[] ={ 0x67,0x0f,0x0b,0x99,0x1d,0xc3,0xfe,0x80,0x56,0x04,0xea,0xc3,0x79,0x35,0x1d,0x9a,0xb5,0x21,0xef,0xac,0x60,0x95,0xf2,0x6b,0xca,0xa3,0xa8,0x56,0x83,0x89,0x77,0x99};
|
||||
|
||||
const uint8_t myInput[] = { 0x0e,0x0e,0xe4,0xf3,0xf4,0xff,0x05,0x81,0xd4,0x1a,0x87,0xb3,0xa3,0xd7,0xb6,0x40,0x24,0x8d,0x9f,0x34,0x86,0x07,0xe9,0x67,0x55,0x5c,0xce,0x2f,0x40,0xe6,0x5e,0x5e,0x40,0x45,0x46,0x56,0x9e,0xb3,0xe6,
|
||||
0xe6,0x23,0x00,0x00,
|
||||
0xcb,0x84,0x81,0x0b,0xc0,0x28,0xa5,0x76,0xec,0xd2,0x0b,0xf9,0xee,0xee,0x43,0x78,0x9c,0x3d,0x55,0xe5,0x54,0xe3,0x05,0xb3,0x46,0x02,0x09,0x64,0x43,0x21,0xd2,0x9e,0x28};
|
||||
|
||||
|
||||
// const uint8_t myKey[] ={146, 6, 71, 248, 241, 11, 139, 72, 70, 73, 173, 248, 53, 153, 197, 184, 107, 186, 19, 126, 126, 178, 46, 149, 221, 135, 57, 217, 133, 40, 246, 119};
|
||||
// const uint8_t myInput[] = {0, 0, 0, 14, 246, 237, 44, 156, 4, 131, 10, 137, 157, 56, 143, 188, 94, 194, 80, 172, 219, 123, 75, 112, 250, 36, 34, 195, 214, 232, 2, 195, 72, 210, 201, 0, 0, 0, 0, 0, 128, 7, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
uint8_t hash[RANDOMX_HASH_SIZE];
|
||||
cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
|
||||
randomx_init_cache(cache, myKey, sizeof(myKey));
|
||||
|
||||
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
|
||||
// randomx_vm_set_cache(vm, cache);
|
||||
|
||||
randomx_calculate_hash(vm, myInput, sizeof(myInput), hash);
|
||||
for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i)
|
||||
// printf("%02d ", hash[i] & 0xff);
|
||||
printf("%02x", hash[i] & 0xff);
|
||||
|
||||
|
||||
//assert(equalsHex(hash, "1a7151b1367507ded1e9af0b97da8ae23ec84e9f352eb731eab8f0f060710300"));
|
||||
}
|
||||
|
||||
|
||||
//g++ aes_hash.cpp allocator.cpp argon2_avx2.c argon2_core.c argon2_ref.c argon2_ssse3.c assembly_generator_x86.cpp blake2_generator.cpp bytecode_machine.cpp cpu.cpp dataset.cpp instruction.cpp instructions_portable.cpp randomx.cpp reciprocal.c soft_aes.cpp superscalar.cpp virtual_machine.cpp virtual_memory.cpp vm_compiled.cpp vm_compiled_light.cpp vm_interpreted.cpp vm_interpreted_light.cpp ./blake2/blake2b.c jit_compiler_x86.cpp jit_compiler_x86_static.S
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user