add algo files

This commit is contained in:
lzx 2025-07-04 17:47:19 +08:00
parent 155a338571
commit e7a707ea39
122 changed files with 18845 additions and 0 deletions

View File

@ -0,0 +1,9 @@
#矿池算法文件
```
编译命令(gcc编译器ubuntu系统)
blake2b:
sha3x:
blake3:
heavyHash:
randomx:
```

237
blake2b/blake2b.c Normal file
View File

@ -0,0 +1,237 @@
#include <stdio.h>
#include <stdint.h>
#include <stddef.h>
#include "blake2b.h"
#ifndef ROTR64
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
#endif
// Little-endian byte access.
#define B2B_GET64(p) \
(((uint64_t) ((uint8_t *) (p))[0]) ^ \
(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
(((uint64_t) ((uint8_t *) (p))[7]) << 56))
// G Mixing function.
#define B2B_G(a, b, c, d, x, y) { \
v[a] = v[a] + v[b] + x; \
v[d] = ROTR64(v[d] ^ v[a], 32); \
v[c] = v[c] + v[d]; \
v[b] = ROTR64(v[b] ^ v[c], 24); \
v[a] = v[a] + v[b] + y; \
v[d] = ROTR64(v[d] ^ v[a], 16); \
v[c] = v[c] + v[d]; \
v[b] = ROTR64(v[b] ^ v[c], 63); \
}
// Initialization Vector.
static const uint64_t blake2b_iv[8] = {
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
};
unsigned int be32toh(unsigned int x)
{
return (((x & 0xff000000U) >> 24) | ((x & 0x00ff0000U) >> 8) |
((x & 0x0000ff00U) << 8) | ((x & 0x000000ffU) << 24));
}
static void blake2b_compress(blake2b_ctx *ctx, int last)
{
const uint8_t sigma[12][16] = {
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
};
int i;
uint64_t v[16], m[16];
//long int v15;
for (i = 0; i < 8; i++) { // init work variables
v[i] = ctx->h[i];
v[i + 8] = blake2b_iv[i];
}
//v15= v[15];
//printf("the v15=%02lx\n" ,v15);
v[12] ^= ctx->t[0]; // low 64 bits of offset
v[13] ^= ctx->t[1]; // high 64 bits
//printf("ctx->t[0]= %016llx\n",ctx->t[0]);
//printf("ctx->t[1]= %016llx\n",ctx->t[1]);
if (last) // last block flag set ?
v[14] = ~v[14];
for (i = 0; i < 16; i++) // get little-endian words
m[i] = B2B_GET64(&ctx->b[8 * i]);
//for (int i = 0; i < 16; ++i) printf("v[%0d]=%016llx\n", i,v[i]);
//for (int i = 0; i < 16; ++i) printf("m[%0d]=%016llx\n", i,m[i]);
for (i = 0; i < 12; i++) { // twelve rounds for (i = 0; i < 12; i++) { // twelve rounds
//printf("i=%0d\n",i);
//for (int i = 0; i < 16; ++i) printf("v[%0d]=%016llx\n", i,v[i]);
//for (int i = 0; i < 16; ++i) printf("m[%0d]=%016llx\n", i,m[i]);
B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
}
//for (int i = 0; i < 16; ++i) printf("v[%0d]=%016llx\n", i,v[i]);
//v15= v[15];
//printf("the v15=%02lx\n" ,v15);
for( i = 0; i < 8; ++i )
ctx->h[i] ^= v[i] ^ v[i + 8];
//v15= v[15];
//printf("the v15=%02lx\n" ,v15);
}
void blake2b_update(blake2b_ctx *ctx, const void *in, size_t inlen) // data bytes
{
size_t i;
for (i = 0; i < inlen; i++) {
if (ctx->c == 128) { // buffer full ?
ctx->t[0] += ctx->c; // add counters
if (ctx->t[0] < ctx->c) // carry overflow ?
ctx->t[1]++; // high word
blake2b_compress(ctx, 0); // compress (not last)
ctx->c = 0; // counter to zero
//for (int i = 0; i < 8; ++i) printf("ctx->h[%0d]=%016llx\n",i,ctx->h[i]);
}
ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
}
/*
int t0,t1;
t0 = ctx->t[0];
t1 = ctx->t[1];
printf("the t[0]=%02x,the t[1]=%02x\n", t0,t1);
*/
//printf("the t[0]=%02x,the t[1]=%02x\n", ctx->t[0],ctx->t[1]);
}
int blake2b_init(blake2b_ctx *ctx, size_t outlen) // (keylen=0: no key)
{
//size_t i;
//if (outlen == 0 || outlen > 64 || keylen > 64)
// return -1; // illegal parameters
//
//for (i = 0; i < 8; i++) // state, "param block"
// ctx->h[i] = blake2b_iv[i];
//
// ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
// ctx->t[0] = 0; // input count low word
// ctx->t[1] = 0; // input count high word
// ctx->c = 0; // pointer within buffer
// ctx->outlen = outlen;
//
//for (i = keylen; i < 128; i++) // zero input block
// ctx->b[i] = 0;
//if (keylen > 0) {
// blake2b_update(ctx, key, keylen);
// ctx->c = 128; // at the end
// }
ctx->h[0]= 0x6a09e667f2bdc93a;
ctx->h[1]= 0xbb67ae8584caa73b;
ctx->h[2]= 0x3c6ef372fe94f82b;
ctx->h[3]= 0xa54ff53a5f1d36f1;
ctx->h[4]= 0x510e527fade682d1;
ctx->h[5]= 0x9b05688c2b3e6c1f;
ctx->h[6]= 0x48ec89c38820de31;
ctx->h[7]= 0x5be0cd10137e21b1;
ctx->t[0] = 0; // input count low word
ctx->t[1] = 0; // input count high word
ctx->c = 0; // pointer within buffer
ctx->outlen = outlen;
return 0;
}
void blake2b_final(blake2b_ctx *ctx, void *out)
{
size_t i;
ctx->t[0] += ctx->c; // mark last block offset
if (ctx->t[0] < ctx->c) // carry overflow
ctx->t[1]++; // high word
while (ctx->c < 128) // fill up with zeros
ctx->b[ctx->c++] = 0;
//printf("the msg is :\n");
//for (int i = 0; i < 128; ++i) printf("%02x",ctx->b[i]);
//printf("\n");
blake2b_compress(ctx, 1); // final block flag = 1
// little endian convert and store
/*
int t0,t1;
t0 = ctx->t[0];
t1 = ctx->t[1];
printf("the t[0]=%02x,the t[1]=%02x\n", t0,t1);
*/
//for (i = 0; i < 128; i++)
// ((uint8_t *) msg_s1)[i] = ctx->b[i];
//for (int i = 0; i < 8; ++i) printf("ctx->h[%0d]=%016llx\n",i,ctx->h[i]);
//printf("ctx->outlen= %0d\n",ctx->outlen );
for (i = 0; i < ctx->outlen; i++) {
((uint8_t *) out)[i] = (ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
}
}
/*
int blake2b(void *out, size_t outlen,const void *key, size_t keylen,const void *in, size_t inlen,void *msg_s1)
{
blake2b_ctx ctx;
if (blake2b_init(&ctx, outlen, key, keylen))
return -1;
blake2b_update(&ctx, in, inlen);
blake2b_final(&ctx, out);
return 0;
}
int main(int argc, char** argv) {
int i,j;
uint8_t md[50];
uint8_t msg_s1[128];
uint8_t in[140+4] = {
0x04,0x00,0x00,0x00,
0xe5,0x4c,0x27,0x54,0x40,0x50,0x66,0x8f,0x27,0x2e,0xc3,0xb4,0x60,0xe1,0xcd,0xe7,
0x45,0xc6,0xb2,0x12,0x39,0xa8,0x1d,0xae,0x63,0x7f,0xde,0x47,0x04,0x00,0x00,0x00,
0x84,0x4b,0xc0,0xc5,0x56,0x96,0xef,0x99,0x20,0xee,0xda,0x11,0xc1,0xeb,0x41,0xb0,
0xc2,0xe7,0x32,0x4b,0x46,0xcc,0x2e,0x7a,0xa0,0xc2,0xaa,0x77,0x36,0x44,0x8d,0x7a,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x68,0x24,0x1a,0x58,
0x7e,0x7e,0x06,0x1d,
0x25,0x0e,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x01,0x00,0x00,0x00
};
blake2b(md, 50, NULL, 0, in, sizeof(in), msg_s1);
printf("the hash out is \n");
for (i=0; i < sizeof(md); ++i) {
printf("%02x",md[i]);
}
printf("\n");
return 0;
}
*/

24
blake2b/blake2b.h Normal file
View File

@ -0,0 +1,24 @@
#include <stddef.h>
#include <stdint.h>
enum blake2b_constant
{
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
typedef struct {
uint8_t b[128]; // input buffer
uint64_t h[8]; // chained state
uint64_t t[2]; // total number of bytes
size_t c; // pointer for b[]
size_t outlen; // digest size
} blake2b_ctx;
void blake2b_update(blake2b_ctx *ctx, const void *in, size_t inlen);
int blake2b_init(blake2b_ctx *ctx, size_t outlen);
void blake2b_final(blake2b_ctx *ctx, void *out);

192
heavyHash/DiagonalMatrix.h Normal file
View File

@ -0,0 +1,192 @@
#ifndef _SINGULAR_DIAGONAL_MATRIX_H
#define _SINGULAR_DIAGONAL_MATRIX_H
#include "singular.h"
//#define L(M,N) (M < N ? M : N)
#define L(M,N) (M*N)
#if 1
typedef struct class_DiagonalMatrix DiagonalMatrix_t;
struct class_DiagonalMatrix {
double *pBlock;
double (*operator)(struct class_DiagonalMatrix *p, int i, int j);
void (*release)(struct class_DiagonalMatrix *p);
};
#else
#include <algorithm>
#include <cassert>
#include <cstring>
//namespace singular {
/**
* Diagonal matrix.
*/
template < int M, int N >
class DiagonalMatrix {
public:
enum {
/** Number of diagonal elements. */
L = M < N ? M : N
};
private:
/**
* Memory block for the diagonal elements.
* The ith row and ith column is given by `elements[i]`.
*/
double* pBlock;
public:
/** Initializes a diagonal matrix filled with 0. */
DiagonalMatrix() {
this->pBlock = new double[L];
std::fill(this->pBlock, this->pBlock + L, 0.0);
}
/**
* Initializes a diagonal matrix with given diagonal values.
*
* The diagonal matrix will look like,
* \f[
* \begin{bmatrix}
* \text{values[0]} & & \\
* & \ddots & \\
* & & \text{values[min(M, N)-1]}
* \end{bmatrix}
* \f]
*
* The behavior is undefined if `values` has less than `min(M, N)`
* elements.
*
* @param values
* Diagonal values of the matrix.
*/
explicit DiagonalMatrix(const double values[]) {
this->pBlock = new double[L];
memcpy(this->pBlock, values, sizeof(double) * L);
}
/**
* Steals the memory block from a given diagonal matrix.
*
* @param[in,out] copyee
* Diagonal matrix from which the memory block is to be stolen.
* No loger valid after this call.
*/
#if SINGULAR_RVALUE_REFERENCE_SUPPORTED
DiagonalMatrix(DiagonalMatrix&& copyee) : pBlock(copyee.pBlock) {
copyee.pBlock = nullptr;
}
#else
DiagonalMatrix(const DiagonalMatrix& copyee) : pBlock(copyee.pBlock) {
const_cast< DiagonalMatrix& >(copyee).pBlock = nullptr;
}
#endif
/** Releases the memory block of this diagonal matrix. */
~DiagonalMatrix() {
this->release();
}
/**
* Steals the memory block from a given diagonal matrix.
*
* @param[in,out] copyee
* Diagonal matrix from which the memory block is to be stolen.
* No longer valid after this call.
* @return
* Reference to this diagonal matrix.
*/
#if SINGULAR_RVALUE_REFERENCE_SUPPORTED
DiagonalMatrix& operator =(DiagonalMatrix&& copyee) {
#else
DiagonalMatrix& operator =(const DiagonalMatrix& copyee) {
#endif
this->release();
this->pBlock = copyee.pBlock;
#if SINGULAR_RVALUE_REFERENCE_SUPPORTED
copyee.pBlock = nullptr;
#else
const_cast< DiagonalMatrix& >(copyee).pBlock = nullptr;
#endif
return *this;
}
/**
* Returns a clone of this matrix.
*
* @return
* Clone of this matrix.
*/
inline DiagonalMatrix clone() const {
return DiagonalMatrix(this->pBlock);
}
/**
* Returns the element at a given row and column.
*
* The behavior is undefined,
* - if `i < 0` or `i >= M`,
* - or if `j < 0` or `j >= N`
*
* @param i
* Index of the row to be obtained.
* @param j
* Index of the column to be obtained.
* @return
* Element at the ith row and jth column.
* 0 if `i != j`.
*/
double operator ()(int i, int j) const {
assert(i >= 0 && i < M);
assert(j >= 0 && j < N);
if (i == j) {
return this->pBlock[i];
} else {
return 0.0;
}
}
/**
* Transposes this matrix.
*
* @return
* Transposed matrix.
*/
DiagonalMatrix< N, M > transpose() const {
return DiagonalMatrix< N, M >(this->pBlock);
}
private:
#if SINGULAR_FUNCTION_DELETION_SUPPORTED
/** Copy constructor is not allowed. */
DiagonalMatrix(const DiagonalMatrix& copyee) = delete;
/** Copy assignment is not allowed. */
DiagonalMatrix& operator =(const DiagonalMatrix& copyee) = delete;
#elif SINGULAR_RVALUE_REFERENCE_SUPPORTED
/** Copy constructor is not allowed. */
DiagonalMatrix(const DiagonalMatrix& copyee) {}
/** Copy assignment is not allowed. */
DiagonalMatrix& operator =(const DiagonalMatrix& copyee) {
return *this;
}
#endif
/**
* Releases the memory block of this matrix.
* Has no effect if the memory block has already been released.
*/
inline void release() {
delete[] this->pBlock;
this->pBlock = nullptr;
}
};
//}
#endif
#endif

14
heavyHash/Makefile Normal file
View File

@ -0,0 +1,14 @@
SRCS = heavyhash.c obtc.c sha3.c
OBJS = $(SRCS:.c=.o)
CC = gcc
CCFLAGS = -Wall
libkas.a:$(OBJS)
ar -rv libkas.a $(OBJS)
%.o:%.c
$(CC) $(CCFLAGS) -c $< -o $@
clean:
rm -rf *.o *.a

25
heavyHash/Matrix.h Normal file
View File

@ -0,0 +1,25 @@
#ifndef _SINGULAR_MATRIX_H
#define _SINGULAR_MATRIX_H
#include "singular.h"
#include "Vector.h"
//#include <algorithm>
//#include <cstring>
//#include <iostream>
typedef struct class_Matrix Matrix_t;
struct class_Matrix {
double* pBlock;
Matrix_t (*clone)(struct class_Matrix *p);
void (*filledwith)(struct class_Matrix *p,const double values[]);
double (*operator)(struct class_Matrix *p, int i, int j);
Vector_t (*row)(struct class_Matrix *p, int i);
Vector_t (*column)(struct class_Matrix *p, int j);
void (*release)(struct class_Matrix *p);
};
#endif

19
heavyHash/Reflector.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef _SINGULAR_REFLECTOR_H
#define _SINGULAR_REFLECTOR_H
#include "Matrix.h"
#include "singular.h"
typedef struct class_Reflector Reflector_t;
struct class_Reflector {
Vector_t u;
double gamma;
size_t L;
double* ptr;
};
#endif

17
heavyHash/Rotator.h Normal file
View File

@ -0,0 +1,17 @@
#ifndef _SINGULAR_ROTATOR_H
#define _SINGULAR_ROTATOR_H
#include "Matrix.h"
#include "singular.h"
typedef struct class_Rotator Rotator_t;
struct class_Rotator {
double elements[4];
double (*operator)(struct class_Rotator *p, int i, int j);
void (*applyFromLeftTo)(struct class_Rotator *p, Matrix_t rhs, int k);
void (*applyFromRightTo)(struct class_Rotator *p, Matrix_t rhs, int k);
};
#endif

45
heavyHash/Svd.h Normal file
View File

@ -0,0 +1,45 @@
#ifndef _SINGULAR_SVD_H
#define _SINGULAR_SVD_H
#include "DiagonalMatrix.h"
#include "Matrix.h"
#include "Reflector.h"
#include "Rotator.h"
//#include "singular.h"
//#include <algorithm>
//#include <cassert>
//#include <tuple>
typedef struct Svd Svd_t;
struct Svd {
//USV decomposeUSV(const Matrix< M, N >& m)
bool (*isFullRank)(Svd_t *p, DiagonalMatrix_t singularValues, const int size);
};
typedef struct class_BidiagonalMatrix BidiagonalMatrix_t;
struct class_BidiagonalMatrix {
double* pBlock;
double (*operator)(struct class_BidiagonalMatrix *p, int i, int j);
double (*applyFirstRotatorFromRight)(struct class_BidiagonalMatrix *p, Rotator_t *r);
double (*applyRotatorFromRight)(struct class_BidiagonalMatrix *p, Rotator_t *r, int n, double bulge);
double (*applyRotatorFromLeft)(struct class_BidiagonalMatrix *p, Rotator_t *r, int n, double bulge);
BidiagonalMatrix_t (*bidiagonalize)(struct class_BidiagonalMatrix *p, Matrix_t m);
void (*doFrancis)(struct class_BidiagonalMatrix *m,int n);
double (*calculateShift)(struct class_BidiagonalMatrix *m, int n);
void (*releases)(struct class_BidiagonalMatrix *p);
};
void BidiagonalMatrix_doFrancis(BidiagonalMatrix_t *m, int n);
double BidiagonalMatrix_calculateShift(BidiagonalMatrix_t *m, int n);
double BidiagonalMatrix_applyRotatorFromLeft(BidiagonalMatrix_t *ptr, Rotator_t *r, int n, double bulge);
double BidiagonalMatrix_applyRotatorFromRight(BidiagonalMatrix_t *ptr, Rotator_t *r, int n, double bulge);
double BidiagonalMatrix_applyFirstRotatorFromRight(BidiagonalMatrix_t *p, Rotator_t *r);
double BidiagonalMatrix_operator(BidiagonalMatrix_t *p, int i, int j);
void BidiagonalMatrix_release(BidiagonalMatrix_t *p);
void BidiagonalMatrix_init(BidiagonalMatrix_t *p, Matrix_t *m);
void BidiagonalMatrix_def(BidiagonalMatrix_t *p);
BidiagonalMatrix_t BidiagonalMatrix_bidiagonalize(BidiagonalMatrix_t *p, Matrix_t m);
#endif

22
heavyHash/Vector.h Normal file
View File

@ -0,0 +1,22 @@
#ifndef _SINGULAR_VECTOR_H
#define _SINGULAR_VECTOR_H
#include <stddef.h>
#include "singular.h"
typedef struct class_Vector Vector_t;
struct class_Vector {
double* pBlock;
size_t len;
ptrdiff_t delta;
double* ptr;
void (*move)(struct class_Vector *p, ptrdiff_t delta);
double (*operator)(struct class_Vector *p, size_t idx);
Vector_t (*slice)(struct class_Vector *p, size_t start);
};
#endif

150
heavyHash/heavyhash.c Normal file
View File

@ -0,0 +1,150 @@
#include "sha3.h"
#include "obtc.h"
void CSHA3_256_Write(CSHA3_256 *p, const unsigned char* data, size_t len) {
sha3_update(&p->context, data, len);
//return *this;
}
void CSHA3_256_Finalize(CSHA3_256 *p, unsigned char hash[OUTPUT_SIZE]) {
sha3_final(hash, &p->context);
}
/*void CSHA3_256_Reset(Obtc_t *Obtc, CSHA3_256 *p) {
sha3_init(Obtc,&p->context, OUTPUT_SIZE);
//return *this;
}*/
void CSHA3_256_init(Obtc_t *Obtc, CSHA3_256 *p) {
sha3_init(Obtc, &p->context, OUTPUT_SIZE);
p->Write = CSHA3_256_Write;
p->Finalize = CSHA3_256_Finalize;
//p->Reset = CSHA3_256_Reset;
}
void CSHA3_256_CSHA3_256(Obtc_t *Obtc,CSHA3_256 *p) {
sha3_init(Obtc,&p->context, OUTPUT_SIZE);
}
void CHeavyHash_Write(CHeavyHash *p, const unsigned char* data, size_t len) {
p->hasher.Write(&p->hasher,data, len);
//sha3_update(&CSHA3_256_p.context, data, len);
//CSHA3_256_Write(&CSHA3_256_p, data, OUTPUT_SIZE);
}
void CHeavyHash_Finalize(Obtc_t *Obtc, CHeavyHash *p, unsigned char hash[OUTPUT_SIZE]) {
uint256 hash_first;
uint8_t a[32];
p->hasher.Finalize(&p->hasher,&Obtc->g_hash_first.bb.data[0]);
memcpy(a,&Obtc->g_hash_first.bb.data[0],32);
uint256 product = MultiplyUsing4bitPrecision(p->matrix, Obtc->g_hash_first);
uint256 hash_xored;
for (size_t i = 0; i < OUTPUT_SIZE; ++i) {
//hash_xored.begin()[i] = hash_first.begin()[i] ^ product.begin()[i];
hash_xored.bb.data[i] = Obtc->g_hash_first.bb.data[i] ^ product.bb.data[i];
}
uint8_t temp[200]={
0x16,0x19,0x32,0x7d,0x10,0xb9,0xda,0x35,0x54,0x9a,0xe0,0x31,0x2f,0x9f,0xc6,0x15,0x92,0xbb,0x39,0x9d,
0xb5,0x29,0x0c,0x0a,0x47,0xc3,0x9f,0x67,0x51,0x12,0xc2,0x2e,0xc7,0x76,0xc5,0x04,0x84,0x81,0xb9,0x57,
0xb9,0x92,0xf2,0xd3,0x7b,0x34,0xca,0x58,0xea,0x8f,0xdb,0x80,0xba,0xc4,0x6d,0x39,0x7e,0x8f,0x1d,0xb1,
0x77,0x65,0xcc,0x07,0x87,0xe9,0x61,0xb0,0x36,0xbc,0x94,0x16,0x77,0x4c,0x86,0x83,0x54,0x34,0xf2,0xb0,
0x4e,0xf7,0x4b,0x3a,0x99,0xcd,0xb0,0x44,0x2e,0xc6,0x5b,0xd3,0x56,0x24,0x93,0xe4,0x6c,0x6b,0x7d,0x01,
0xa7,0x69,0xcc,0x3d,0xd3,0x1f,0x4c,0xc3,0x54,0xc1,0x8c,0x3f,0xf4,0x31,0xc0,0x5d,0xd0,0xa9,0xa2,0x26,
0xa0,0xbc,0xaa,0x9f,0x79,0x2a,0x3d,0x0c,0x80,0x39,0xf9,0xa6,0x0d,0xcf,0x6a,0x48,0x5e,0x21,0x90,0x40,
0x25,0x0f,0xc4,0x62,0xc1,0x00,0xff,0x2a,0x93,0x89,0x35,0xba,0x72,0xc7,0xd8,0x2e,0x14,0xf3,0x40,0x69,
0xe7,0x20,0xe0,0xdf,0x44,0xee,0xce,0xde,0x11,0xa7,0x5f,0x4c,0x80,0x05,0x64,0x98,0x7a,0x14,0xff,0x48,
0x16,0xc7,0xf8,0xee,0x79,0x62,0x9b,0x0e,0x2f,0x9f,0x42,0x16,0x3a,0xd7,0x4c,0x52,0xb2,0x24,0x85,0x09,
};
for(int i = 0 ;i< 200 ;i++)Obtc->const_data[i] = temp[i];
// CSHA3_256().Write(hash_xored.begin(), OUTPUT_SIZE).Finalize(hash);
CSHA3_256_CSHA3_256(Obtc, &p->hasher);
CSHA3_256_Write(&p->hasher, &hash_xored.bb.data[0], OUTPUT_SIZE);
CSHA3_256_Finalize(&p->hasher, hash) ;
}
void CHeavyHash_Reset(CHeavyHash *p, uint64_t matrix_[64*64]) {
for (int i = 0; i < 64*64; ++i)
p->matrix[i] = matrix_[i];
}
void CHeavyHash_init(Obtc_t *Obtc, CHeavyHash *p, uint64_t matrix_[64*64]){
p->Write = CHeavyHash_Write;
p->Finalize = CHeavyHash_Finalize;
p->Reset = CHeavyHash_Reset;
p->hasher.Write = CSHA3_256_Write;
p->hasher.Finalize = CSHA3_256_Finalize;
//p->hasher.Reset = CSHA3_256_Reset;
sha3_init(Obtc, &p->hasher.context, OUTPUT_SIZE);
for (int i = 0; i < 64*64; ++i)
p->matrix[i] = matrix_[i];
}
void MultiplyMatrices(uint64_t matrix[64*64], uint64_t vector[64], uint64_t product[64]){
for (int i = 0; i < 64; ++i) {
for (int j = 0; j < 64; ++j) {
product[i] += matrix[64*i + j]*vector[j];
}
}
}
uint256 MultiplyUsing4bitPrecision(uint64_t matrix[64*64], const uint256 hash) {
// conversion to matrix with 4 bit values
uint64_t vector[64] = {0};
ConvertTo4BitPrecisionVector(hash, vector);
// perform matrix multiplication
uint64_t product[64] = {0};
MultiplyMatrices(matrix, vector, product);
for (int i = 0; i < 64; ++i) {
product[i] >>= 10;
}
return Convert4bitVectorToUint(product);
}
void ConvertTo4BitPrecisionVector(uint256 bit_sequence, uint64_t vector[64]) {
int index = 0;
int i;
for (i = 0; i < WIDTH; i++) {
vector[index] = bit_sequence.bb.data[i] >> 4;
vector[index+1] = bit_sequence.bb.data[i] & 0xF;
index += 2;
}
}
uint256 Convert4bitVectorToUint(const uint64_t x[64]) {
uint256 bit_sequence;
int index = 0;
int i;
for (i = 0; i < WIDTH; i++) {
bit_sequence.bb.data[i] = ( x[index] << 4) | x[index+1];
index += 2;
}
return bit_sequence;
}

98
heavyHash/heavyhash.h Normal file
View File

@ -0,0 +1,98 @@
#ifndef OPOW_CRYPTO_HEAVYHASH_H
#define OPOW_CRYPTO_HEAVYHASH_H
#include <stdint.h>
#include <stdlib.h>
#include "sha3.h"
//#include <memory>
//#include "obtc.h"
#define OUTPUT_SIZE 32
typedef struct class_CSHA3_256 CSHA3_256;
struct class_CSHA3_256
{
sha3_ctx_t context;
// static const size_t OUTPUT_SIZE = 32;
//CSHA3_256& Write(const unsigned char* data, size_t len);
void (*Write)(struct class_CSHA3_256 *p, const unsigned char* data, size_t len);
void (*Finalize)(struct class_CSHA3_256 *p, unsigned char hash[OUTPUT_SIZE]);
//CSHA3_256& Reset();
};
typedef struct class_CHeavyHash CHeavyHash;
struct class_CHeavyHash
{
uint64_t matrix[64*64];
CSHA3_256 hasher;
//static const size_t OUTPUT_SIZE = 32;
//explicit CHeavyHash(uint64_t matrix_[64*64]);
//CHeavyHash& Reset(uint64_t matrix_[64*64]);
//CHeavyHash& Write(const unsigned char* data, size_t len);
//void Finalize(unsigned char hash[OUTPUT_SIZE]);
void (*Reset)(struct class_CHeavyHash *p, uint64_t matrix_[64*64]);
void (*Write)(struct class_CHeavyHash *p, const unsigned char* data, size_t len);
void (*Finalize)(struct class_CHeavyHash *p, unsigned char hash[OUTPUT_SIZE]);
};
#if 0
/** A hasher class for SHA3-256. */
class CSHA3_256
{
private:
sha3_ctx_t context;
public:
static const size_t OUTPUT_SIZE = 32;
CSHA3_256();
CSHA3_256& Write(const unsigned char* data, size_t len);
void Finalize(unsigned char hash[OUTPUT_SIZE]);
CSHA3_256& Reset();
};
class CHeavyHash
{
private:
uint64_t matrix[64*64];
CSHA3_256 hasher;
public:
static const size_t OUTPUT_SIZE = 32;
explicit CHeavyHash(uint64_t matrix_[64*64]);
CHeavyHash& Reset(uint64_t matrix_[64*64]);
CHeavyHash& Write(const unsigned char* data, size_t len);
void Finalize(unsigned char hash[OUTPUT_SIZE]);
};
#endif
uint256 MultiplyUsing4bitPrecision(uint64_t matrix[64*64], const uint256 hash);
void ConvertTo4BitPrecisionVector(uint256 bit_sequence, uint64_t vector[64]);
uint256 Convert4bitVectorToUint(const uint64_t x[64]);
//zzj add
/*extern void CSHA3_256_init(struct Obtc_opt *Obtc, CSHA3_256 *p);
void CSHA3_256_CSHA3_256(struct Obtc_opt *Obtc, CSHA3_256 *p);
void CSHA3_256_Write(CSHA3_256 *p, const unsigned char* data, size_t len);
void CSHA3_256_Finalize(CSHA3_256 *p, unsigned char hash[OUTPUT_SIZE]);
//
void CHeavyHash_init(struct Obtc_opt *Obtc, CHeavyHash *p, uint64_t matrix_[64*64]);
void CHeavyHash_Write(CHeavyHash *p, const unsigned char* data, size_t len);
void CHeavyHash_Finalize(struct Obtc_opt *Obtc, CHeavyHash *p, unsigned char hash[OUTPUT_SIZE]);
*/
#endif // OPOW_CRYPTO_HEAVYHASH_H

BIN
heavyHash/heavyhash.o Normal file

Binary file not shown.

BIN
heavyHash/libkas.a Normal file

Binary file not shown.

907
heavyHash/obtc.c Normal file
View File

@ -0,0 +1,907 @@
//! heavyhash extracted from optical bitcoin
//! 2022 barrystyle
#include <stdint.h>
#include <stdlib.h>
#include <math.h>
#include <search.h>//qsort
#include<time.h>
#include "obtc.h"
#define M 64
#define N 64
bool Is4BitPrecision(const uint64_t matrix[64*64])
{
for (int i = 0; i < 64; ++i) {
for (int j = 0; j < 64; ++j) {
if (matrix[ i*64 + j] > 0xF)
return false;
}
}
return true;
}
double DiagonalMatrix_operator(DiagonalMatrix_t *p, int i, int j)
{
assert(i >= 0 && i < 64);
assert(j >= 0 && j < 64);
if (i == j) {
return p->pBlock[i];
} else {
return 0.0;
}
}
void DiagonalMatrix_release(DiagonalMatrix_t *p)
{
if (p->pBlock != NULL){
free(p->pBlock);
p->pBlock = NULL;
}
}
void DiagonalMatrix_init(DiagonalMatrix_t *p, const double values[])
{
p->pBlock = (double *)malloc(sizeof(double)*M);
//memset(pBlock, 0.0, sizeof(double)*L(64,64));
memcpy(p->pBlock, values, sizeof(double) * M);
p->operator = DiagonalMatrix_operator;
p->release = DiagonalMatrix_release;
}
void DiagonalMatrix_DiagonalMatrix(DiagonalMatrix_t *p)
{
p->operator = DiagonalMatrix_operator;
p->release = DiagonalMatrix_release;
}
//-----------------------------vector-------------------------------//
void vector_move(Vector_t *p, ptrdiff_t delta) {
p->ptr += delta;
}
Vector_t vector_slice(Vector_t v, size_t start) {
//assert(start >= 0 && start <= p->len);
Vector_t v_tmp;
v_tmp.pBlock = v.pBlock + start * v.delta;
v_tmp.len = v.len - start;
v_tmp.delta = v.delta;
return v_tmp;
}
double Vector_column_operator(Vector_t *p, size_t idx){
return p->pBlock[idx * p->delta];
}
double Vector_row_operator(Vector_t *p, size_t idx){
return p->pBlock[idx * p->delta];
}
void Vector_sync(Matrix_t *p, size_t idx, Vector_t vec, int offset){
for(int i = 0; i < vec.len; i++){
p->pBlock[idx+(offset+i)*N] = vec.pBlock[i];
}
}
void Vector_row_sync(Matrix_t *p, size_t idx, Vector_t vec, int offset){
for(int i = 0; i < vec.len; i++){
p->pBlock[offset+idx*N+i] = vec.pBlock[i];
}
}
//-----------------------------Martrix-------------------------------//
Matrix_t Matrix_clone(Matrix_t *p)
{
Matrix_t m;
m.pBlock = (double *)malloc(sizeof(double)*L(64,64));
memcpy(m.pBlock, p->pBlock, sizeof(double)*L(64,64));
return m;
}
void Matrix_filledwith(Matrix_t *p, const double values[])
{
//p->pBlock = (double *)malloc(sizeof(double)*L(64,64));
//memset(pBlock, 0.0, sizeof(double)*L(64,64));
memcpy(p->pBlock, values, sizeof(double) * L(64,64));
}
double Matrix_operator(Matrix_t *p, int i, int j)
{
assert(i >= 0 && i < N);
assert(j >= 0 && j < N);
return p->pBlock[i*N+j];
}
Vector_t Matrix_row(Matrix_t *p, int i)
{
Vector_t vec_tmp;
vec_tmp.len = N;
vec_tmp.delta = 1;
vec_tmp.pBlock = p->pBlock + i*N;
//return Vector< const double >(this->pBlock + i * N, N, 1);
return vec_tmp;
}
Vector_t Matrix_column(Matrix_t *p, int j)
{
Vector_t vec_tmp;
vec_tmp.len = M;
vec_tmp.delta = N;
vec_tmp.pBlock = p->pBlock + j;
return vec_tmp;
//return Vector< double >(this->pBlock + j, M, N);
}
void Matrix_release(Matrix_t *p)
{
if (p->pBlock != NULL){
free(p->pBlock);
p->pBlock = NULL;
}
}
void Matrix_init(Matrix_t *p)
{
p->pBlock = (double *)malloc(sizeof(double)*L(64,64));
memset(p->pBlock, 0.0, sizeof(double)*L(64,64));
//memcpy(p->pBlock, values, sizeof(double) * L(64,64));
}
void Matrix_def(Matrix_t *p)
{
//p->clone = Matrix_clone;
p->filledwith = Matrix_filledwith;
p->operator = Matrix_operator;
p->row = Matrix_row;
p->column = Matrix_column;
p->release = Matrix_release;
}
//-----------------------------Rotator-------------------------------//
double max(double a, double b)
{
return a > b ? a : b;
}
double Rotator_operator(Rotator_t *p, int i, int j){
assert(0 <= i && i < 2);
assert(0 <= j && j < 2);
return p->elements[i * 2 + j];
}
void Rotator_init(Rotator_t *p, double x1, double x2)
{
// normalizes by the maximum magnitude
// to avoid harmful underflow and overflow
double mx = max(fabs(x1), fabs(x2));
x1 /= mx;
x2 /= mx;
double norm = sqrt(x1 * x1 + x2 * x2);
double cs = x1 / norm;
double sn = x2 / norm;
p->elements[0] = cs;
p->elements[1] = -sn;
p->elements[2] = sn;
p->elements[3] = cs;
p->operator = Rotator_operator;
}
//-----------------------------Reflector-------------------------------//
void Reflector_transform(Reflector_t *p, double u0, size_t len){
int i;
for (i = 0; i < len; i++){
p->u.pBlock[i] = p->u.pBlock[i] /u0;
}
}
void Reflector_transform_left(Reflector_t *src1, Vector_t src2, Vector_t dst, double gUM, size_t len){
int i;
for (i = 0; i < len; i++){
dst.pBlock[i] = src2.pBlock[i] - src1->u.pBlock[i] * gUM;
}
}
void Reflector_transform_right(Reflector_t *src1, Vector_t src2, Vector_t dst, double gMU, size_t len){
int i;
for (i = 0; i < len; i++){
dst.pBlock[i] = src2.pBlock[i] - gMU * src1->u.pBlock[i];
}
}
void Reflector_init(Reflector_t *p, Vector_t v) {
//assert(v.size() > 0 && v.size() <= L);
//const size_t N = v.size();
//const size_t p->L = sizeof(v)/sizeof(double);
p->L = v.len;
p->u.pBlock = (double *)malloc(sizeof(double)*v.len);
memcpy(p->u.pBlock, v.pBlock, sizeof(double)*v.len);
// normalizes elements by the maximum amplitude
// to avoid harmful underflow and overflow
double mx = 0.0;
for (size_t i = 0; i < p->L; ++i) {
mx = max(fabs(p->u.pBlock[i]), mx);
}
if (mx > 0.0) {
// calculates the normalized norm
double tau = 0.0;
for (size_t i = 0; i < p->L; ++i) {
double x = p->u.pBlock[i] / mx;
p->u.pBlock[i] = x;
tau += x * x;
}
tau = sqrt(tau);
// tau's sign should be the same as the first element in `u`
if (p->u.pBlock[0] < 0.0) {
tau = -tau;
}
double u0 = p->u.pBlock[0] + tau;
p->u.pBlock[0] = u0;
Reflector_transform(p, u0, p->L);
p->gamma = u0 / tau;
} else {
// v is a zero vector
p->gamma = 0.0;
memset(p->u.pBlock, 0.0, p->L);
}
}
void Reflector_release(Reflector_t *p){
if (p->u.pBlock != NULL){
free(p->u.pBlock);
p->u.pBlock = NULL;
}
}
double inner_product(double *a,double *b,int n){
int i;
double sum = 0.0;
for(i = 0; i < n; i++)
{
sum += (*(a+i))*(*(b+i));
}
return sum;
}
Matrix_t Reflector_applyFromLeftTo(Reflector_t *p, Matrix_t m){
// H * m = m - gamma * u * u^T * m
Matrix_t m2 = Matrix_clone(&m);//m->clone(m);
Vector_t vec_m;
Vector_t vec_m2;
int offset = N - p->L;
for (int i = 0; i < N; ++i) {
// caches gamma * u^T * m
vec_m = Matrix_column(&m, i);
Vector_t srcColumn = vector_slice(vec_m, offset);
double v_src_column[srcColumn.len];
for(size_t i = 0; i < srcColumn.len; i++){
v_src_column[i] = Vector_column_operator(&srcColumn, i);
}
srcColumn.pBlock = v_src_column;
double gUM = inner_product(p->u.pBlock, srcColumn.pBlock, p->L);
//Vector< const double > srcColumn = m->column(m, i).slice(offset);
gUM *= p->gamma;
// H * m = m - u * gUM
vec_m2 = Matrix_column(&m2, i);
Vector_t dstColumn = vector_slice(vec_m2, offset);
double v_dstcolumn[dstColumn.len];
for(size_t i = 0; i < dstColumn.len; i++){
v_dstcolumn[i] = Vector_column_operator(&dstColumn, i);
}
dstColumn.pBlock = v_dstcolumn;
Reflector_transform_left(p, srcColumn, dstColumn, gUM, p->L);
Vector_sync(&m2, i, dstColumn, offset);
}
Matrix_release(&m);
return m2;
}
Matrix_t Reflector_applyFromRightTo(Reflector_t *p, Matrix_t m){
// m * H = m - m * gamma * u * u^T
Matrix_t m2 = Matrix_clone(&m);
Vector_t vec_m;
Vector_t vec_m2;
int offset = 64 - p->L;
for (int i = 0; i < M; ++i) {
// caches gamma * m * u
vec_m = Matrix_row(&m, i);
Vector_t srcRow = vector_slice(vec_m, offset);
double v_src_row[srcRow.len];
for(size_t j = 0; j< srcRow.len; j++){
v_src_row[j] = Vector_row_operator(&srcRow, j);
}
srcRow.pBlock = v_src_row;
double gMU = inner_product(p->u.pBlock, srcRow.pBlock, p->L);
gMU *= p->gamma;
// m * H = m - gMU * u^T
vec_m2 = Matrix_row(&m2, i);
Vector_t dstRow = vector_slice(vec_m2, offset);
double v_dstrow[dstRow.len];
for(size_t j = 0; j < dstRow.len; j++){
v_dstrow[j] = Vector_row_operator(&dstRow, j);
}
dstRow.pBlock = v_dstrow;
Reflector_transform_right(p ,srcRow, dstRow, gMU, p->L);
Vector_row_sync(&m2, i, dstRow, offset);
}
Matrix_release(&m);
return m2;
}
//-----------------------------Svd-------------------------------//
int cmp_double(const void* e1, const void* e2)
{
if ((*(double*)e2 - *(double*)e1) > 0.00000)
return 1;
else if ((*(double*)e2 - *(double*)e1) == 0.000000)
return 0;
else
return -1;
}
DiagonalMatrix_t Svd_decomposeUSV(BidiagonalMatrix_t *p, Matrix_t *m) {
const int MAX_ITERATIONS = N * 10;
// allocates matrices
Matrix_t m1 = Matrix_clone(m);
Matrix_def(&m1);
// bidiagonalizes a given matrix
BidiagonalMatrix_t m2 = p->bidiagonalize(p, m1);
// repeats Francis iteration
int iteration = 0;
int n = N;
while (n >= 2) {
// processes the n-1 x n-1 submatrix
// if the current n x n submatrix has converged
double bn = m2.operator(&m2, n - 1, n - 1);
if (bn == 0.0 || fabs(m2.operator(&m2, n - 2, n - 1) / bn) < 1.0e-15) {
--n;
} else {
// aborts if too many iterations
++iteration;
if (iteration > MAX_ITERATIONS) {
break;
}
m2.doFrancis(&m2, n);
}
}
// copies the diagonal elements
// and makes all singular values positive
double ss[N];
for (int i = 0; i < N; ++i) {
if (m2.operator(&m2, i, i) < 0) {
ss[i] = -m2.operator(&m2, i, i);
// inverts the sign of the right singular vector
//Vector< double > vi = v.column(i);
//std::transform(
// vi.begin(), vi.end(), vi.begin(),
// [](double x) {
// return -x;
// });
} else {
ss[i] = m2.operator(&m2, i, i);
}
}
// sorts singular values in descending order if necessary
int shuffle[M]; // M >= N
bool sortNeeded = false;
for (int i = 0; i < M; ++i) {
shuffle[i] = i;
sortNeeded = sortNeeded || (i < N - 1 && ss[i] < ss[i + 1]);
}
m1.release(&m1);
BidiagonalMatrix_release(p);
DiagonalMatrix_t dm;
if (sortNeeded) {
// shuffles the N (<= M) singular values
qsort(ss, N,sizeof(double), cmp_double);
double ss2[M];
memcpy(ss2, ss, M*sizeof(double));
DiagonalMatrix_init(&dm, ss2);
return dm;
} else {
DiagonalMatrix_init(&dm, ss);
return dm;
}
}
bool Svd_isFullRank(DiagonalMatrix_t *p, const int size) {
const double round_off = 1.000009e-12;
for (int i = 0; i < size; ++i) {
if (fabs( p->operator(p, i, i) ) < round_off){
p->release(p);
return false;
}
}
p->release(p);
return true;
}
//-----------------------------BidiagonalMatrix_t-------------------------------//
BidiagonalMatrix_t BidiagonalMatrix_bidiagonalize(BidiagonalMatrix_t *p, Matrix_t m)
{
assert(M >= N);
Vector_t vec_m;
Vector_t vec_m2;
for (int i = 0; i < N; ++i) {
Reflector_t rU;
vec_m = Matrix_column(&m, i);
Vector_t column_slice = vector_slice(vec_m, i);
// applies a householder transform to the column vector i
double v_column[column_slice.len];
for(size_t i = 0; i < column_slice.len; i++){
v_column[i] = Vector_column_operator(&column_slice, i);
}
column_slice.pBlock = v_column;
Reflector_init(&rU, column_slice);
m = Reflector_applyFromLeftTo(&rU, m);
Reflector_release(&rU);
//u = rU.applyFromRightTo(u); // U1^T*U0^T = U0*U1
if (i < N - 1) {
// applies a householder transform to the row vector i + 1
//Reflector< N > rV(m.row(i).slice(i + 1));
Reflector_t rV;
vec_m2 = Matrix_row(&m, i);
Vector_t row_slice = vector_slice(vec_m2, i+1);
double v_row[row_slice.len];
for(size_t i = 0; i < row_slice.len; i++){
v_row[i] = Vector_row_operator(&row_slice, i);
}
row_slice.pBlock = v_row;
Reflector_init(&rV, row_slice);
m = Reflector_applyFromRightTo(&rV, m);
//m = rV.applyFromRightTo(m);
//v = rV.applyFromRightTo(v);
Reflector_release(&rV);
}
}
BidiagonalMatrix_init(p, &m);
return *p;
}
void BidiagonalMatrix_release(BidiagonalMatrix_t *p)
{
if (p->pBlock != NULL){
free(p->pBlock);
p->pBlock = NULL;
}
}
double BidiagonalMatrix_operator(BidiagonalMatrix_t *p, int i, int j)
{
assert(i >= 0 && i < M);
assert(j >= 0 && j < N);
if (i == j) {
return p->pBlock[2 * i];
} else if (i + 1 == j) {
return p->pBlock[2 * i + 1];
} else {
return 0.0;
}
}
double BidiagonalMatrix_applyFirstRotatorFromRight(BidiagonalMatrix_t *p, Rotator_t *r)
{
double b1 = p->pBlock[0];
double g1 = p->pBlock[1];
double b2 = p->pBlock[2];
double r11 = Rotator_operator(r, 0, 0);//r->operator(r, 0, 0);
double r12 = Rotator_operator(r, 0, 1);//r->operator(r, 0, 1);
double r21 = Rotator_operator(r, 1, 0);//r->operator(r, 1, 0);
double r22 = Rotator_operator(r, 1, 1);//r->operator(r, 1, 1);
//Rotator_operator
p->pBlock[0] = b1 * r11 + g1 * r21;
p->pBlock[1] = b1 * r12 + g1 * r22;
p->pBlock[2] = b2 * r22;
return b2 * r21;
}
double BidiagonalMatrix_applyRotatorFromRight(BidiagonalMatrix_t *ptr, Rotator_t *r, int n, double bulge)
{
double* p = ptr->pBlock + n * 2;
double g0 = p[-1];
double b1 = p[0];
double g1 = p[1];
double b2 = p[2];
double r11 = r->operator(r, 0, 0);
double r12 = r->operator(r, 0, 1);
double r21 = r->operator(r, 1, 0);
double r22 = r->operator(r, 1, 1);
p[-1] = g0 * r11 + bulge * r21;
p[0] = b1 * r11 + g1 * r21;
p[1] = b1 * r12 + g1 * r22;
p[2] = b2 * r22;
return b2 * r21;
}
double BidiagonalMatrix_applyRotatorFromLeft(BidiagonalMatrix_t *ptr, Rotator_t *r, int n, double bulge)
{
double* p = ptr->pBlock + n * 2;
double b1 = p[0];
double g1 = p[1];
double b2 = p[2];
double r11 = r->operator(r, 0, 0);
double r12 = r->operator(r, 0, 1);
double r21 = r->operator(r, 1, 0);
double r22 = r->operator(r, 1, 1);
p[0] = r11 * b1 + r21 * bulge;
p[1] = r11 * g1 + r21 * b2;
p[2] = r12 * g1 + r22 * b2;
double newBulge;
if (n < N - 2) {
double g2 = p[3];
newBulge = r21 * g2;
p[3] = r22 * g2;
} else {
newBulge = 0.0;
}
return newBulge;
}
double BidiagonalMatrix_calculateShift(BidiagonalMatrix_t *m, int n)
{
assert(M >= N);
assert(n >= 2);
double b1 = m->operator(m, n - 2, n - 2);
double b2 = m->operator(m, n - 1, n - 1);
double g1 = m->operator(m, n - 2, n - 1);
// solves lambda^4 - d*lambda^2 + e = 0
// where
// d = b1^2 + b2^2 + g1^2
// e = b1^2 * b2^2
// chooses lambda (rho) closest to b2
double rho;
double d = b1 * b1 + b2 * b2 + g1 * g1;
double e = b1 * b1 * b2 * b2;
// lambda^2 = (d +- sqrt(d^2 - 4e)) / 2
// so, f = d^2 - 4e must be positive
double f = d * d - 4 * e;
if (f >= 0) {
f = sqrt(f);
// lambda = +-sqrt(d +- f) (d >= 0, f >= 0)
// if d > f, both d+f and d-f have real square roots
// otherwise considers only d+f
if (d > f) {
// lets l1 > l2
double l1 = sqrt((d + f) * 0.5);
double l2 = sqrt((d - f) * 0.5);
// if b2 >= 0, chooses a positive shift
// otherwise chooses a negative shift
if (b2 >= 0) {
if (fabs(b2 - l1) < fabs(b2 - l2)) {
rho = l1;
} else {
rho = l2;
}
} else {
if (fabs(b2 + l1) < fabs(b2 + l2)) {
rho = -l1;
} else {
rho = -l2;
}
}
} else {
double l1 = sqrt((d + f) * 0.5);
if (fabs(b2 - l1) <= fabs(b2 + l1)) {
rho = l1;
} else {
rho = -l1;
}
}
} else {
// no solution. chooses b2 as the shift
rho = b2;
}
return rho;
}
void BidiagonalMatrix_doFrancis(BidiagonalMatrix_t *m, int n)
{
assert(M >= N);
assert(n >= 2);
// calculates the shift
double rho = m->calculateShift(m, n);
// applies the first right rotator
double b1 = m->operator(m, 0, 0);
double g1 = m->operator(m, 0, 1);
double mx = max(fabs(rho), max(fabs(b1), fabs(g1)));
rho /= mx;
b1 /= mx;
g1 /= mx;
//Rotator_t r0(b1 * b1 - rho * rho, b1 * g1);
Rotator_t r0;
Rotator_init(&r0, b1 * b1 - rho * rho, b1 * g1);
double bulge = m->applyFirstRotatorFromRight(m, &r0);
//v = r0.applyFromRightTo(&r0, v, 0);
// applies the first left rotator
Rotator_t r1;
Rotator_init(&r1, m->operator(m, 0, 0), bulge);
//Rotator_t r1(m(0, 0), bulge);
bulge = m->applyRotatorFromLeft(m, &r1, 0, bulge);
//u = r1.applyFromRightTo(&r1, u, 0); // U1^T*U0^T = U0*U1
for (int i = 1; i + 1 < n; ++i) {
// calculates (i+1)-th right rotator
//Rotator rV(m(i - 1, i), bulge);
Rotator_t rV;
Rotator_init(&rV, m->operator(m, i - 1, i), bulge);
bulge = m->applyRotatorFromRight(m, &rV, i, bulge);
//v = rV.applyFromRightTo(&rV, v, i);
// calculates (i+1)-th left rotator
//Rotator rU(m(i, i), bulge);
Rotator_t rU;
Rotator_init(&rU, m->operator(m, i, i), bulge);
bulge = m->applyRotatorFromLeft(m, &rU, i, bulge);
//u = rU.applyFromRightTo(rU, u, i); // U1^T*U0^T = U0*U1
}
}
void BidiagonalMatrix_def(BidiagonalMatrix_t *p)
{
p->applyFirstRotatorFromRight = BidiagonalMatrix_applyFirstRotatorFromRight;
p->applyRotatorFromLeft = BidiagonalMatrix_applyRotatorFromLeft;
p->applyRotatorFromRight = BidiagonalMatrix_applyRotatorFromRight;
p->bidiagonalize = BidiagonalMatrix_bidiagonalize;
p->calculateShift = BidiagonalMatrix_calculateShift;
p->doFrancis = BidiagonalMatrix_doFrancis;
p->operator = BidiagonalMatrix_operator;
p->releases = BidiagonalMatrix_release;
}
void BidiagonalMatrix_init(BidiagonalMatrix_t *p, Matrix_t *m)
{
assert(M >= N);
int len;
len = 2 * N - 1;
p->pBlock = (double *)malloc(sizeof(double)*len);
memset(p->pBlock, 0.0,sizeof(double)*len);
for (int i = 0; i < N; ++i) {
p->pBlock[i * 2] = Matrix_operator(m, i, i);//m->operator(m, i, i);
if (i < N - 1) {
p->pBlock[i * 2 + 1] = Matrix_operator(m, i, i + 1);//m->operator(m, i, i + 1);
}
}
}
bool IsFullRank(const uint64_t matrix_[64*64])
{
double matrix__ [64*64];
// Matrix<64, 64> matrix;
for (int i = 0; i < 64; ++i) {
for (int j = 0; j < 64; ++j) {
matrix__[64*i + j] = (double) matrix_[64*i + j];
}
}
DiagonalMatrix_t dm;
Matrix_t mt;
BidiagonalMatrix_t bt;
DiagonalMatrix_init(&dm, matrix__);
//matrix.fill(matrix__);
Matrix_init(&mt);
Matrix_def(&mt);
mt.filledwith(&mt, matrix__);
BidiagonalMatrix_def(&bt);
DiagonalMatrix_t usv = Svd_decomposeUSV(&bt, &mt);
DiagonalMatrix_t singularValues = usv;
mt.release(&mt);
dm.release(&dm);
//DiagonalMatrix_release(&dm);
return Svd_isFullRank(&usv,64);
}
uint64_t GetUint64_t(uint8_t *data, int pos)
{
const uint8_t* ptr = data + pos * 8;
return ((uint64_t)ptr[0]) | \
((uint64_t)ptr[1]) << 8 | \
((uint64_t)ptr[2]) << 16 | \
((uint64_t)ptr[3]) << 24 | \
((uint64_t)ptr[4]) << 32 | \
((uint64_t)ptr[5]) << 40 | \
((uint64_t)ptr[6]) << 48 | \
((uint64_t)ptr[7]) << 56;
}
void XoShiRo256PlusPlus_init(Obtc_t *Obtc, uint64_t *s, uint256 seed) {
for (int i = 0; i < 4; ++i) {
//p->s[i] = seed.GetUint64(i);
s[i] = GetUint64_t(Obtc->data_r,i);
}
}
uint64_t RotateLeft64(const uint64_t x, int k) {
return (x << k) | (x >> (64 - k));
}
uint64_t XoShiRo256PlusPlus_operator(uint64_t *s){
const uint64_t result = RotateLeft64(s[0] + s[3], 23) + s[0];
const uint64_t t = s[1] << 17;
s[2] ^= s[0];
s[3] ^= s[1];
s[1] ^= s[2];
s[0] ^= s[3];
s[2] ^= t;
s[3] = RotateLeft64(s[3], 45);
return result;
}
void GenerateHeavyHashMatrix_t(Obtc_t *Obtc, uint256 matrix_seed, uint64_t matrix[64*64])
{
XoShiRo256PlusPlus_init(Obtc, Obtc->ss, matrix_seed);
do {
for (int i = 0; i < 64; ++i) {
for (int j = 0; j < 64; j += 16) {
uint64_t value = XoShiRo256PlusPlus_operator(Obtc->ss);//generator();
for (int shift = 0; shift < 16; ++shift) {
matrix[64*i + j + shift] = (value >> (4 * shift)) & 0xF;
}
}
}
//} while (!Is4BitPrecision(matrix) || !IsFullRank(matrix));
}while(!Is4BitPrecision(matrix));
}
void serialize_heavyhash(Obtc_t *Obtc, uint64_t matrix[64*64], const char* in, char* out, int len)
{
uint8_t temp[200]={
0x02,0xb9,0x7c,0x78,0x6f,0x82,0x43,0x83,0x5d,0x11,0x29,0xcf,0x82,0xaf,0xa5,0xbc,0xb1,0xfc,0xce,0x9c,
0xe7,0x8b,0x52,0x72,0x48,0xb0,0x94,0x27,0xa8,0x74,0x2e,0xdb,0x89,0xca,0x4e,0x84,0x9b,0xce,0xcf,0x4a,
0xd1,0x02,0x57,0x41,0x05,0x09,0x5f,0x8d,0xba,0x1d,0xe5,0xe4,0x45,0x16,0x68,0xe4,0xc1,0xa2,0x02,0x1d,
0x56,0x3b,0xb1,0x42,0x8f,0x06,0xdd,0x1c,0x7a,0x2f,0x85,0x1a,0x34,0x85,0x54,0x90,0x64,0xa3,0x6a,0x46,
0xb2,0x1a,0x60,0x1f,0x85,0xb4,0xb2,0x23,0xe6,0xc8,0x5d,0x8f,0x82,0xe9,0xda,0x89,0xec,0x70,0xf1,0xa4,
0x25,0xb1,0x37,0x15,0x44,0xe3,0x67,0x87,0x5b,0x29,0x91,0x52,0x0f,0x96,0x07,0x05,0x40,0xf1,0x4a,0x0e,
0x2e,0x65,0x1c,0x3c,0x43,0x28,0x5f,0xf0,0xf8,0xeb,0xf1,0x33,0x88,0x66,0x31,0x40,0x77,0x6b,0xf6,0x0c,
0x78,0x9b,0xc2,0x9c,0x18,0x3a,0x98,0x1e,0xad,0x41,0x5b,0x10,0x4a,0xef,0x61,0xd6,0x29,0xdc,0xe2,0x46,
0x7b,0x2f,0xaf,0xca,0x87,0x5e,0x2d,0x65,0x1b,0xa5,0xa4,0xa3,0xf5,0x98,0x69,0xa0,0x1e,0x5f,0x2e,0x72,
0x0e,0xfb,0x44,0xd2,0x29,0xbf,0x88,0x55,0xb7,0x02,0x7e,0x3c,0x11,0x3c,0xff,0x0d,0xa1,0xf6,0xd8,0x3d
};
for(int i = 0 ;i< 200 ;i++)Obtc->const_data[i] = temp[i];
CHeavyHash_init(Obtc, &Obtc->CHeavyHash_p, matrix);
CHeavyHash_Write(&Obtc->CHeavyHash_p, (const unsigned char*)in, len);
CHeavyHash_Finalize(Obtc, &Obtc->CHeavyHash_p, (unsigned char*)out);
}
void opticalbtc_hash(const char* in, char* out, int len)
{
uint8_t *ptr = (uint8_t*) in;
uint256 seed, hashprev;
uint64_t matrix[64*64];
Obtc_t Obtc;
CSHA3_256_init(&Obtc, &Obtc.CSHA3_256_p);
memcpy(Obtc.data_r,ptr, 32);
GenerateHeavyHashMatrix_t(&Obtc, seed, matrix);
serialize_heavyhash(&Obtc, matrix, in, out, len);
}

52
heavyHash/obtc.h Normal file
View File

@ -0,0 +1,52 @@
#ifndef OBTC_H
#define OBTC_H
#include "uint256.h"
#include "xoshiro256pp.h"
#include "Svd.h"
#include "DiagonalMatrix.h"
#include "Matrix.h"
#include "Rotator.h"
#include "heavyhash.h"
typedef struct Obtc_opt Obtc_t;
struct Obtc_opt{
uint8_t data_r[32];
uint64_t ss[4];
uint8_t const_data[200];
CSHA3_256 CSHA3_256_p;
CHeavyHash CHeavyHash_p;
uint256 g_hash_first;
XoShiRo256PlusPlus_t *xo;
DiagonalMatrix_t g_DiagonalMatrix;
};
//struct Obtc_opt;
bool Is4BitPrecision(const uint64_t matrix[64*64]);
bool IsFullRank(const uint64_t matrix_[64*64]);
void GenerateHeavyHashMatrix(uint256 matrix_seed, uint64_t matrix[64*64]);
void serialize_heavyhash(Obtc_t *Obtc, uint64_t matrix[64*64], const char* in, char* out, int len);
void opticalbtc_hash(const char* in, char* out, int len);
extern void CSHA3_256_init(Obtc_t *Obtc, CSHA3_256 *p);
extern void CSHA3_256_CSHA3_256(Obtc_t *Obtc, CSHA3_256 *p);
extern void CSHA3_256_Write(CSHA3_256 *p, const unsigned char* data, size_t len);
extern void CSHA3_256_Finalize(CSHA3_256 *p, unsigned char hash[OUTPUT_SIZE]);
//extern void CSHA3_256_Reset(Obtc_t *Obtc, CSHA3_256 *p);
extern void CHeavyHash_init(Obtc_t *Obtc, CHeavyHash *p, uint64_t matrix_[64*64]);
extern void CHeavyHash_Write(CHeavyHash *p, const unsigned char* data, size_t len);
extern void CHeavyHash_Finalize(Obtc_t *Obtc, CHeavyHash *p, unsigned char hash[OUTPUT_SIZE]);
extern int sha3_init(Obtc_t *Obtc,sha3_ctx_t *c, int mdlen); // mdlen = hash output in bytes
#endif // OBTC_H

BIN
heavyHash/obtc.o Normal file

Binary file not shown.

199
heavyHash/sha3.c Normal file
View File

@ -0,0 +1,199 @@
// sha3.c
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
// Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3"
// Revised 03-Sep-15 for portability + OpenSSL - style API
#include <stdio.h>
#include "sha3.h"
#include "obtc.h"
// update the state with given number of rounds
void sha3_keccakf(uint64_t st[25])
{
// constants
const uint64_t keccakf_rndc[24] = {
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008
};
const int keccakf_rotc[24] = {
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44
};
const int keccakf_piln[24] = {
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
};
// variables
int i, j, r;
uint64_t t, bc[5];
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
uint8_t *v;
// endianess conversion. this is redundant on little-endian targets
for (i = 0; i < 25; i++) {
v = (uint8_t *) &st[i];
st[i] = ((uint64_t) v[0]) | (((uint64_t) v[1]) << 8) |
(((uint64_t) v[2]) << 16) | (((uint64_t) v[3]) << 24) |
(((uint64_t) v[4]) << 32) | (((uint64_t) v[5]) << 40) |
(((uint64_t) v[6]) << 48) | (((uint64_t) v[7]) << 56);
}
#endif
// actual iteration
for (r = 0; r < KECCAKF_ROUNDS; r++) {
// Theta
for (i = 0; i < 5; i++)
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
for (i = 0; i < 5; i++) {
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
for (j = 0; j < 25; j += 5)
st[j + i] ^= t;
}
// Rho Pi
t = st[1];
for (i = 0; i < 24; i++) {
j = keccakf_piln[i];
bc[0] = st[j];
st[j] = ROTL64(t, keccakf_rotc[i]);
t = bc[0];
}
// Chi
for (j = 0; j < 25; j += 5) {
for (i = 0; i < 5; i++)
bc[i] = st[j + i];
for (i = 0; i < 5; i++)
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
}
// Iota
st[0] ^= keccakf_rndc[r];
}
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
// endianess conversion. this is redundant on little-endian targets
for (i = 0; i < 25; i++) {
v = (uint8_t *) &st[i];
t = st[i];
v[0] = t & 0xFF;
v[1] = (t >> 8) & 0xFF;
v[2] = (t >> 16) & 0xFF;
v[3] = (t >> 24) & 0xFF;
v[4] = (t >> 32) & 0xFF;
v[5] = (t >> 40) & 0xFF;
v[6] = (t >> 48) & 0xFF;
v[7] = (t >> 56) & 0xFF;
}
#endif
}
// Initialize the context for SHA3
int sha3_init(Obtc_t *Obtc, sha3_ctx_t *c, int mdlen)
{
int i;
for (i = 0; i < 200; i++){
c->st.b[i] = Obtc->const_data[199-i];
}
c->mdlen = mdlen;
c->rsiz = 200 - 2 * mdlen;
c->pt = 0;
return 1;
}
// update state with more data
int sha3_update(sha3_ctx_t *c, const void *data, size_t len)
{
size_t i;
int j;
j = c->pt;
for (i = 0; i < len; i++) {
c->st.b[j++] ^= ((const uint8_t *) data)[i];
if (j >= c->rsiz) {
sha3_keccakf(c->st.q);
j = 0;
}
}
c->pt = j;
return 1;
}
// finalize and output a hash
int sha3_final(void *md, sha3_ctx_t *c)
{
int i;
// c->st.b[c->pt] ^= 0x06;
c->st.b[c->pt] ^= 0x04;
c->st.b[c->rsiz - 1] ^= 0x80;
sha3_keccakf(c->st.q);
for (i = 0; i < c->mdlen; i++) {
((uint8_t *) md)[i] = c->st.b[i];
}
return 1;
}
// compute a SHA-3 hash (md) of given byte length from "in"
/*void *sha3(const void *in, size_t inlen, void *md, int mdlen)
{
sha3_ctx_t sha3;
sha3_init(&sha3, mdlen);
sha3_update(&sha3, in, inlen);
sha3_final(md, &sha3);
return md;
}*/
// SHAKE128 and SHAKE256 extensible-output functionality
void shake_xof(sha3_ctx_t *c)
{
c->st.b[c->pt] ^= 0x1F;
c->st.b[c->rsiz - 1] ^= 0x80;
sha3_keccakf(c->st.q);
c->pt = 0;
}
void shake_out(sha3_ctx_t *c, void *out, size_t len)
{
size_t i;
int j;
j = c->pt;
for (i = 0; i < len; i++) {
if (j >= c->rsiz) {
sha3_keccakf(c->st.q);
j = 0;
}
((uint8_t *) out)[i] = c->st.b[j++];
}
c->pt = j;
}

51
heavyHash/sha3.h Normal file
View File

@ -0,0 +1,51 @@
// sha3.h
// 19-Nov-11 Markku-Juhani O. Saarinen <mjos@iki.fi>
#ifndef SHA3_H
#define SHA3_H
#include <stddef.h>
#include <stdint.h>
#ifndef KECCAKF_ROUNDS
#define KECCAKF_ROUNDS 24
#endif
#ifndef ROTL64
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
#endif
// state context
typedef struct {
union { // state:
uint8_t b[200]; // 8-bit bytes
uint64_t q[25]; // 64-bit words
} st;
int pt, rsiz, mdlen; // these don't overflow
} sha3_ctx_t;
// Compression function.
void sha3_keccakf(uint64_t st[25]);
// OpenSSL - like interfece
int sha3_update(sha3_ctx_t *c, const void *data, size_t len);
int sha3_final(void *md, sha3_ctx_t *c); // digest goes to md
// compute a sha3 hash (md) of given byte length from "in"
void *sha3(const void *in, size_t inlen, void *md, int mdlen);
// SHAKE128 and SHAKE256 extensible-output functions
//#define shake128_init(c) sha3_init(c, 16)
//#define shake256_init(c) sha3_init(c, 32)
//#define shake_update sha3_update
void shake_xof(sha3_ctx_t *c);
void shake_out(sha3_ctx_t *c, void *out, size_t len);
#endif

BIN
heavyHash/sha3.o Normal file

Binary file not shown.

42
heavyHash/singular.h Normal file
View File

@ -0,0 +1,42 @@
#ifndef _SINGULAR_SINGULAR_H
#define _SINGULAR_SINGULAR_H
/** The version of the singular library. */
#define SINGULAR_VERSION "@PROJECT_VERSION@"
/**
* Whether rvalue references are supported.
*
* Visual Studio 2010 and lower do not have rvalue references so far.
*/
#if defined(_MSC_VER) && _MSC_VER < 1700
#define SINGULAR_RVALUE_REFERENCE_SUPPORTED 0
#else
#define SINGULAR_RVALUE_REFERENCE_SUPPORTED 1
#endif
/**
* Whether function deletions are supported.
*
* Visual Studio 2012 and lower do not like "delete" stuff so far.
*/
#if defined(_MSC_VER) && _MSC_VER < 1800
#define SINGULAR_FUNCTION_DELETION_SUPPORTED 0
#else
#define SINGULAR_FUNCTION_DELETION_SUPPORTED 1
#endif
/**
* Whether template friend operator overalodings are supported.
*
* Visual Studio 2012 and lower do not like overloading a template firend
* operators.
* Neither does GCC.
*/
#if (defined(_MSC_VER) && _MSC_VER < 1800) || (defined(__GNUC__) && !defined(__clang__))
#define SINGULAR_TEMPLATE_FRIEND_OPERATOR_OVERLOADING_SUPPORTED 0
#else
#define SINGULAR_TEMPLATE_FRIEND_OPERATOR_OVERLOADING_SUPPORTED 1
#endif
#endif

183
heavyHash/test.c Normal file
View File

@ -0,0 +1,183 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include "obtc.h"
#include "singular.h"
#include<time.h>
//uint8_t const_data[200];
static const int hex2bin_tbl[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
bool hex2bin(unsigned char *p, const char *hexstr, size_t len)
{
int nibble1, nibble2;
unsigned char idx;
bool ret = false;
while (*hexstr && len) {
if ((!hexstr[1])) {
printf("hex2bin str truncated");
return ret;
}
idx = *hexstr++;
nibble1 = hex2bin_tbl[idx];
idx = *hexstr++;
nibble2 = hex2bin_tbl[idx];
if (((nibble1 < 0) || (nibble2 < 0))) {
printf("hex2bin scan failed");
return ret;
}
*p++ = (((unsigned char)nibble1) << 4) | ((unsigned char)nibble2);
--len;
}
if ((len == 0 && *hexstr == 0))
ret = true;
return ret;
}
int main(int argc, char **argv)
{
uint8_t genesis_block[80];
uint8_t hash[32];
uint8_t last_prehash[32];
uint8_t last_prehash2[32];
uint8_t prehash_tab[32];
uint8_t nonce_tab[8];
char *prehash_str = "d76ffb1d8e31ec04579b0452b52bde7dbd088e912ab1b11ba924ff309ab44a43";//argv[1];
char *nonce_str = "80aa59a7901f2502";//argv[2];
//char *last_prehash_str = argv[3];
//char *last_prehash_str2 = argv[4];
hex2bin(prehash_tab, prehash_str, strlen(prehash_str)/2);
hex2bin(nonce_tab, nonce_str, strlen(nonce_str)/2);
//hex2bin(last_prehash, last_prehash_str, strlen(last_prehash_str)/2);
//hex2bin(last_prehash2, last_prehash_str2, strlen(last_prehash_str2)/2);
/*for (uint8_t i = 0; i<32;i++){
printf("0x%x, ",prehash_tab[i]);
}
printf("\n");
for (uint8_t i = 0; i<8;i++){
printf("0x%x, ",nonce_tab[i]);
}
printf("\n");*/
//uint8_t prehash[32] = {0x81,0x55,0x3a,0x69,0x5a,0x05,0x88,0x99,0x8c,0x41,0x37,0x92,0xe7,0x4c,0xe8,0xb8,0xf8,0xa0,0x96,0xd6,0x4b,0x3e,0xe4,0x73,0x87,0x37,0x24,0x34,0x48,0x5c,0x0b,0x6f};
//uint8_t utime[8] = {0x00,0x00,0x01,0x84,0x8c,0xa8,0x7c,0x49};
uint8_t pad[32] = {0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
//uint8_t nonce[8] = {0x2f,0x84,0x00,0x00,0x0e,0xba,0x16,0x7c};
#if 0
//uint8_t prehash[32] = {0xa4,0x8f,0xae,0x69,0xeb,0x28,0xc7,0xe0,0x14,0x11,0x4f,0x01,0xae,0x60,0xc8,0xc3,0x82,0x73,0xc4,0x60,0x66,0xcf,0x95,0xd6,0x77,0x1a,0x55,0xd6,0x16,0xd7,0xa1,0x9a};//大端
//uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x22,0x1e,0xad,0x44};
//uint8_t nonce[8] = {0x8e,0xd4,0x00,0x10,0x6b,0xe7,0xe4,0x00};
//uint8_t nonce[8] = {0x8e,0xd4,0x00,0x12,0x27,0xc6,0x90,0xa0};
//uint8_t nonce[8] = {0x8e,0xd4,0x00,0x32,0x0b,0x6b,0xd6,0xd1};
//3f 9a aa c6 32 af 1a 4e 0e 1f ea 8a f8 e3 d5 32 b7 5a a4 71 b2 e4 ef fe a5 bd cc fa 3b dd b6 61
uint8_t prehash[32] = {0x3f,0x9a,0xaa,0xc6,0x32,0xaf,0x1a,0x4e,0x0e,0x1f,0xea,0x8a,0xf8,0xe3,0xd5,0x32,0xb7,0x5a,0xa4,0x71,0xb2,0xe4,0xef,0xfe,0xa5,0xbd,0xcc,0xfa,0x3b,0xdd,0xb6,0x61};//大端
uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x21,0xeb,0x73,0x79};
uint8_t nonce[8] = {0xa3,0xdd,0x02,0x10,0x1a,0x87,0xb4,0x70};
#else
/*443e01000000ffff00000000
e0af2a3ba173157d3f70c94aad742fdf16d9930fdfc9d6301e869bcef04ced6c
e0af2a3ba173157d3f70c94aad742fdf16d9930fdfc9d6301e869bcef04ced6c
dbee84288701000000000000901f25020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
[2023-03-28 22:00:46.549] 00 cc 01 11 70 83 85 16 90 1f 25 02
kas_pow_hash: in:e0af2a3ba173157d3f70c94aad742fdf16d9930fdfc9d6301e869bcef04ced6cdbee842887010000000000000000000000000000000000000000000000000000000000000000000070838516901f2502
kas_pow_hash: out:dae78f5008d3b66f
01a740ce33c812ba
772b3f5763da7bc6
da24cb6c00000000*/
uint8_t prehash[32] = {0xe0,0xaf,0x2a,0x3b,0xa1,0x73,0x15,0x7d,0x3f,0x70,0xc9,0x4a,0xad,0x74,0x2f,0xdf,0x16,0xd9,0x93,0x0f,0xdf,0xc9,0xd6,0x30,0x1e,0x86,0x9b,0xce,0xf0,0x4c,0xed,0x6c};
//uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x28,0x84,0xee,0xdb};
uint8_t nonce[8] = {0x02,0x25,0x1f,0x90,0x16,0x85,0x83,0x70};
#endif
/*for (int i = 0; i < 32; ++i) genesis_block[i] = prehash[i];
for (int i = 0; i < 8; ++i) genesis_block[i+32] = utime[7-i];
for (int i = 0; i < 32; ++i) genesis_block[i+40] = pad[31-i];
for (int i = 0; i < 8; ++i) genesis_block[i+72] = nonce[7-i];*/
//uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x21,0xeb,0x73,0x79};
//dbee8428870100000
uint8_t utime[8] = {0x00,0x00,0x01,0x87,0x28,0x84,0xee,0xdb};
for (int i = 0; i < 32; ++i) genesis_block[i] = prehash_tab[i];
for (int i = 0; i < 8; ++i) genesis_block[i+32] = utime[7-i];
for (int i = 0; i < 32; ++i) genesis_block[i+40] = pad[31-i];
for (int i = 0; i < 8; ++i) genesis_block[i+72] = nonce_tab[i];
clock_t start, finish;
double Total_time;
uint32_t cnt = 0;;
//while(1)
{
start = clock();
opticalbtc_hash((const char*)&genesis_block, (char*)&hash, sizeof(genesis_block));
finish = clock();
Total_time = (double)(finish-start) / CLOCKS_PER_SEC;
printf( "\n cnt = %d, opticalbtc_hash run times %f seconds\n", cnt++, Total_time);
for (int i=31; i>-1; i--) {
printf("%02hhx", hash[i]);
}
printf("\n");
}
//if (hash[31] != 0 || hash[30] != 0){
// for (int i = 0; i < 32; ++i) genesis_block[i] = last_prehash[i];
// opticalbtc_hash((const char*)&genesis_block, (char*)&hash, sizeof(genesis_block));
//}
//if (hash[31] != 0 || hash[30] != 0){
// for (int i = 0; i < 32; ++i) genesis_block[i] = last_prehash2[i];
// opticalbtc_hash((const char*)&genesis_block, (char*)&hash, sizeof(genesis_block));
//}
if (hash[31] != 0 && hash[30] != 0){
printf("reject\n");
}
return 0;
}
//g++ -std=c++11 *.cpp

44
heavyHash/uint256.h Normal file
View File

@ -0,0 +1,44 @@
// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2016 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#ifndef BITCOIN_UINT256_H
#define BITCOIN_UINT256_H
#include <assert.h>
//#include <cstring>
//#include <stdexcept>
#include <stdint.h>
#include <string.h>
//#include <vector>
#include <stdbool.h>
/** 256-bit opaque blob.
* @note This type is called uint256 for historical reasons only. It is an
* opaque blob of 256 bits and has no integer operations. Use arith_uint256 if
* those are required.
*/
#define UPPER_P(x) x->elements[0]
#define LOWER_P(x) x->elements[1]
#define UPPER(x) x.elements[0]
#define LOWER(x) x.elements[1]
#define WIDTH 32
typedef struct class_base_blob base_blob_t;
struct class_base_blob{
uint8_t data[WIDTH];
};
typedef struct uint128_t { uint64_t elements[2]; } uint128_t;
typedef struct uint256_t {
uint128_t elements[2];
base_blob_t bb;
} uint256;
#endif // BITCOIN_UINT256_H

15
heavyHash/xoshiro256pp.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef OPOW_CRYPTO_XOSHIRO256PP_H
#define OPOW_CRYPTO_XOSHIRO256PP_H
#include <stdint.h>
#include "uint256.h"
typedef struct class_XoShiRo256PlusPlus XoShiRo256PlusPlus_t;
struct class_XoShiRo256PlusPlus{
uint64_t s[4];
};
#endif //OPOW_CRYPTO_XOSHIRO256PP_H

BIN
randomx/a.exe Normal file

Binary file not shown.

241
randomx/aes_hash.cpp Normal file
View File

@ -0,0 +1,241 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "soft_aes.h"
#include <cassert>
//NOTE: The functions below were tuned for maximum performance
//and are not cryptographically secure outside of the scope of RandomX.
//It's not recommended to use them as general hash functions and PRNGs.
//AesHash1R:
//state0, state1, state2, state3 = Blake2b-512("RandomX AesHash1R state")
//xkey0, xkey1 = Blake2b-256("RandomX AesHash1R xkeys")
#define AES_HASH_1R_STATE0 0xd7983aad, 0xcc82db47, 0x9fa856de, 0x92b52c0d
#define AES_HASH_1R_STATE1 0xace78057, 0xf59e125a, 0x15c7b798, 0x338d996e
#define AES_HASH_1R_STATE2 0xe8a07ce4, 0x5079506b, 0xae62c7d0, 0x6a770017
#define AES_HASH_1R_STATE3 0x7e994948, 0x79a10005, 0x07ad828d, 0x630a240c
#define AES_HASH_1R_XKEY0 0x06890201, 0x90dc56bf, 0x8b24949f, 0xf6fa8389
#define AES_HASH_1R_XKEY1 0xed18f99b, 0xee1043c6, 0x51f4e03c, 0x61b263d1
/*
Calculate a 512-bit hash of 'input' using 4 lanes of AES.
The input is treated as a set of round keys for the encryption
of the initial state.
'inputSize' must be a multiple of 64.
For a 2 MiB input, this has the same security as 32768-round
AES encryption.
Hashing throughput: >20 GiB/s per CPU core with hardware AES
*/
template<bool softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash) {
assert(inputSize % 64 == 0);
const uint8_t* inptr = (uint8_t*)input;
const uint8_t* inputEnd = inptr + inputSize;
rx_vec_i128 state0, state1, state2, state3;
rx_vec_i128 in0, in1, in2, in3;
//intial state
state0 = rx_set_int_vec_i128(AES_HASH_1R_STATE0);
state1 = rx_set_int_vec_i128(AES_HASH_1R_STATE1);
state2 = rx_set_int_vec_i128(AES_HASH_1R_STATE2);
state3 = rx_set_int_vec_i128(AES_HASH_1R_STATE3);
//process 64 bytes at a time in 4 lanes
while (inptr < inputEnd) {
in0 = rx_load_vec_i128((rx_vec_i128*)inptr + 0);
in1 = rx_load_vec_i128((rx_vec_i128*)inptr + 1);
in2 = rx_load_vec_i128((rx_vec_i128*)inptr + 2);
in3 = rx_load_vec_i128((rx_vec_i128*)inptr + 3);
state0 = aesenc<softAes>(state0, in0);
state1 = aesdec<softAes>(state1, in1);
state2 = aesenc<softAes>(state2, in2);
state3 = aesdec<softAes>(state3, in3);
inptr += 64;
}
//two extra rounds to achieve full diffusion
rx_vec_i128 xkey0 = rx_set_int_vec_i128(AES_HASH_1R_XKEY0);
rx_vec_i128 xkey1 = rx_set_int_vec_i128(AES_HASH_1R_XKEY1);
state0 = aesenc<softAes>(state0, xkey0);
state1 = aesdec<softAes>(state1, xkey0);
state2 = aesenc<softAes>(state2, xkey0);
state3 = aesdec<softAes>(state3, xkey0);
state0 = aesenc<softAes>(state0, xkey1);
state1 = aesdec<softAes>(state1, xkey1);
state2 = aesenc<softAes>(state2, xkey1);
state3 = aesdec<softAes>(state3, xkey1);
//output hash
rx_store_vec_i128((rx_vec_i128*)hash + 0, state0);
rx_store_vec_i128((rx_vec_i128*)hash + 1, state1);
rx_store_vec_i128((rx_vec_i128*)hash + 2, state2);
rx_store_vec_i128((rx_vec_i128*)hash + 3, state3);
}
template void hashAes1Rx4<false>(const void *input, size_t inputSize, void *hash);
template void hashAes1Rx4<true>(const void *input, size_t inputSize, void *hash);
//AesGenerator1R:
//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator1R keys")
#define AES_GEN_1R_KEY0 0xb4f44917, 0xdbb5552b, 0x62716609, 0x6daca553
#define AES_GEN_1R_KEY1 0x0da1dc4e, 0x1725d378, 0x846a710d, 0x6d7caf07
#define AES_GEN_1R_KEY2 0x3e20e345, 0xf4c0794f, 0x9f947ec6, 0x3f1262f1
#define AES_GEN_1R_KEY3 0x49169154, 0x16314c88, 0xb1ba317c, 0x6aef8135
/*
Fill 'buffer' with pseudorandom data based on 512-bit 'state'.
The state is encrypted using a single AES round per 16 bytes of output
in 4 lanes.
'outputSize' must be a multiple of 64.
The modified state is written back to 'state' to allow multiple
calls to this function.
*/
template<bool softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer) {
assert(outputSize % 64 == 0);
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
rx_vec_i128 state0, state1, state2, state3;
rx_vec_i128 key0, key1, key2, key3;
key0 = rx_set_int_vec_i128(AES_GEN_1R_KEY0);
key1 = rx_set_int_vec_i128(AES_GEN_1R_KEY1);
key2 = rx_set_int_vec_i128(AES_GEN_1R_KEY2);
key3 = rx_set_int_vec_i128(AES_GEN_1R_KEY3);
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
while (outptr < outputEnd) {
state0 = aesdec<softAes>(state0, key0);
state1 = aesenc<softAes>(state1, key1);
state2 = aesdec<softAes>(state2, key2);
state3 = aesenc<softAes>(state3, key3);
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
outptr += 64;
}
rx_store_vec_i128((rx_vec_i128*)state + 0, state0);
rx_store_vec_i128((rx_vec_i128*)state + 1, state1);
rx_store_vec_i128((rx_vec_i128*)state + 2, state2);
rx_store_vec_i128((rx_vec_i128*)state + 3, state3);
}
template void fillAes1Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes1Rx4<false>(void *state, size_t outputSize, void *buffer);
//AesGenerator4R:
//key0, key1, key2, key3 = Blake2b-512("RandomX AesGenerator4R keys 0-3")
//key4, key5, key6, key7 = Blake2b-512("RandomX AesGenerator4R keys 4-7")
#define AES_GEN_4R_KEY0 0x99e5d23f, 0x2f546d2b, 0xd1833ddb, 0x6421aadd
#define AES_GEN_4R_KEY1 0xa5dfcde5, 0x06f79d53, 0xb6913f55, 0xb20e3450
#define AES_GEN_4R_KEY2 0x171c02bf, 0x0aa4679f, 0x515e7baf, 0x5c3ed904
#define AES_GEN_4R_KEY3 0xd8ded291, 0xcd673785, 0xe78f5d08, 0x85623763
#define AES_GEN_4R_KEY4 0x229effb4, 0x3d518b6d, 0xe3d6a7a6, 0xb5826f73
#define AES_GEN_4R_KEY5 0xb272b7d2, 0xe9024d4e, 0x9c10b3d9, 0xc7566bf3
#define AES_GEN_4R_KEY6 0xf63befa7, 0x2ba9660a, 0xf765a38b, 0xf273c9e7
#define AES_GEN_4R_KEY7 0xc0b0762d, 0x0c06d1fd, 0x915839de, 0x7a7cd609
template<bool softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer) {
assert(outputSize % 64 == 0);
const uint8_t* outptr = (uint8_t*)buffer;
const uint8_t* outputEnd = outptr + outputSize;
//printf("outputSize= %zu\n",outputSize); //outputSize =2176 这里填充program 空间总计128+256x8 byte
rx_vec_i128 state0, state1, state2, state3;
rx_vec_i128 key0, key1, key2, key3, key4, key5, key6, key7;
key0 = rx_set_int_vec_i128(AES_GEN_4R_KEY0);
key1 = rx_set_int_vec_i128(AES_GEN_4R_KEY1);
key2 = rx_set_int_vec_i128(AES_GEN_4R_KEY2);
key3 = rx_set_int_vec_i128(AES_GEN_4R_KEY3);
key4 = rx_set_int_vec_i128(AES_GEN_4R_KEY4);
key5 = rx_set_int_vec_i128(AES_GEN_4R_KEY5);
key6 = rx_set_int_vec_i128(AES_GEN_4R_KEY6);
key7 = rx_set_int_vec_i128(AES_GEN_4R_KEY7);
state0 = rx_load_vec_i128((rx_vec_i128*)state + 0);
state1 = rx_load_vec_i128((rx_vec_i128*)state + 1);
state2 = rx_load_vec_i128((rx_vec_i128*)state + 2);
state3 = rx_load_vec_i128((rx_vec_i128*)state + 3);
while (outptr < outputEnd) {
state0 = aesdec<softAes>(state0, key0);
state1 = aesenc<softAes>(state1, key0);
state2 = aesdec<softAes>(state2, key4);
state3 = aesenc<softAes>(state3, key4);
state0 = aesdec<softAes>(state0, key1);
state1 = aesenc<softAes>(state1, key1);
state2 = aesdec<softAes>(state2, key5);
state3 = aesenc<softAes>(state3, key5);
state0 = aesdec<softAes>(state0, key2);
state1 = aesenc<softAes>(state1, key2);
state2 = aesdec<softAes>(state2, key6);
state3 = aesenc<softAes>(state3, key6);
state0 = aesdec<softAes>(state0, key3);
state1 = aesenc<softAes>(state1, key3);
state2 = aesdec<softAes>(state2, key7);
state3 = aesenc<softAes>(state3, key7);
rx_store_vec_i128((rx_vec_i128*)outptr + 0, state0);
rx_store_vec_i128((rx_vec_i128*)outptr + 1, state1);
rx_store_vec_i128((rx_vec_i128*)outptr + 2, state2);
rx_store_vec_i128((rx_vec_i128*)outptr + 3, state3);
outptr += 64;
}
}
template void fillAes4Rx4<true>(void *state, size_t outputSize, void *buffer);
template void fillAes4Rx4<false>(void *state, size_t outputSize, void *buffer);

40
randomx/aes_hash.hpp Normal file
View File

@ -0,0 +1,40 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstddef>
template<bool softAes>
void hashAes1Rx4(const void *input, size_t inputSize, void *hash);
template<bool softAes>
void fillAes1Rx4(void *state, size_t outputSize, void *buffer);
template<bool softAes>
void fillAes4Rx4(void *state, size_t outputSize, void *buffer);

60
randomx/allocator.cpp Normal file
View File

@ -0,0 +1,60 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <new>
#include "allocator.hpp"
#include "intrin_portable.h"
#include "virtual_memory.hpp"
#include "common.hpp"
namespace randomx {
template<size_t alignment>
void* AlignedAllocator<alignment>::allocMemory(size_t count) {
void *mem = rx_aligned_alloc(count, alignment);
if (mem == nullptr)
throw std::bad_alloc();
return mem;
}
template<size_t alignment>
void AlignedAllocator<alignment>::freeMemory(void* ptr, size_t count) {
rx_aligned_free(ptr);
}
template class AlignedAllocator<CacheLineSize>;
void* LargePageAllocator::allocMemory(size_t count) {
return allocLargePagesMemory(count);
}
void LargePageAllocator::freeMemory(void* ptr, size_t count) {
freePagedMemory(ptr, count);
};
}

46
randomx/allocator.hpp Normal file
View File

@ -0,0 +1,46 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstddef>
namespace randomx {
template<size_t alignment>
struct AlignedAllocator {
static void* allocMemory(size_t);
static void freeMemory(void*, size_t);
};
struct LargePageAllocator {
static void* allocMemory(size_t);
static void freeMemory(void*, size_t);
};
}

261
randomx/argon2.h Normal file
View File

@ -0,0 +1,261 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#pragma once
#include <stdint.h>
#include <stddef.h>
#include <limits.h>
/*
* Argon2 input parameter restrictions
*/
/* Minimum and maximum number of lanes (degree of parallelism) */
#define ARGON2_MIN_LANES UINT32_C(1)
#define ARGON2_MAX_LANES UINT32_C(0xFFFFFF)
/* Minimum and maximum number of threads */
#define ARGON2_MIN_THREADS UINT32_C(1)
#define ARGON2_MAX_THREADS UINT32_C(0xFFFFFF)
/* Number of synchronization points between lanes per pass */
#define ARGON2_SYNC_POINTS UINT32_C(4)
/* Minimum and maximum digest size in bytes */
#define ARGON2_MIN_OUTLEN UINT32_C(4)
#define ARGON2_MAX_OUTLEN UINT32_C(0xFFFFFFFF)
/* Minimum and maximum number of memory blocks (each of BLOCK_SIZE bytes) */
#define ARGON2_MIN_MEMORY (2 * ARGON2_SYNC_POINTS) /* 2 blocks per slice */
#define ARGON2_MIN(a, b) ((a) < (b) ? (a) : (b))
/* Max memory size is addressing-space/2, topping at 2^32 blocks (4 TB) */
#define ARGON2_MAX_MEMORY_BITS \
ARGON2_MIN(UINT32_C(32), (sizeof(void *) * CHAR_BIT - 10 - 1))
#define ARGON2_MAX_MEMORY \
ARGON2_MIN(UINT32_C(0xFFFFFFFF), UINT64_C(1) << ARGON2_MAX_MEMORY_BITS)
/* Minimum and maximum number of passes */
#define ARGON2_MIN_TIME UINT32_C(1)
#define ARGON2_MAX_TIME UINT32_C(0xFFFFFFFF)
/* Minimum and maximum password length in bytes */
#define ARGON2_MIN_PWD_LENGTH UINT32_C(0)
#define ARGON2_MAX_PWD_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum associated data length in bytes */
#define ARGON2_MIN_AD_LENGTH UINT32_C(0)
#define ARGON2_MAX_AD_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum salt length in bytes */
#define ARGON2_MIN_SALT_LENGTH UINT32_C(8)
#define ARGON2_MAX_SALT_LENGTH UINT32_C(0xFFFFFFFF)
/* Minimum and maximum key length in bytes */
#define ARGON2_MIN_SECRET UINT32_C(0)
#define ARGON2_MAX_SECRET UINT32_C(0xFFFFFFFF)
/* Flags to determine which fields are securely wiped (default = no wipe). */
#define ARGON2_DEFAULT_FLAGS UINT32_C(0)
#define ARGON2_FLAG_CLEAR_PASSWORD (UINT32_C(1) << 0)
#define ARGON2_FLAG_CLEAR_SECRET (UINT32_C(1) << 1)
/* Error codes */
typedef enum Argon2_ErrorCodes {
ARGON2_OK = 0,
ARGON2_OUTPUT_PTR_NULL = -1,
ARGON2_OUTPUT_TOO_SHORT = -2,
ARGON2_OUTPUT_TOO_LONG = -3,
ARGON2_PWD_TOO_SHORT = -4,
ARGON2_PWD_TOO_LONG = -5,
ARGON2_SALT_TOO_SHORT = -6,
ARGON2_SALT_TOO_LONG = -7,
ARGON2_AD_TOO_SHORT = -8,
ARGON2_AD_TOO_LONG = -9,
ARGON2_SECRET_TOO_SHORT = -10,
ARGON2_SECRET_TOO_LONG = -11,
ARGON2_TIME_TOO_SMALL = -12,
ARGON2_TIME_TOO_LARGE = -13,
ARGON2_MEMORY_TOO_LITTLE = -14,
ARGON2_MEMORY_TOO_MUCH = -15,
ARGON2_LANES_TOO_FEW = -16,
ARGON2_LANES_TOO_MANY = -17,
ARGON2_PWD_PTR_MISMATCH = -18, /* NULL ptr with non-zero length */
ARGON2_SALT_PTR_MISMATCH = -19, /* NULL ptr with non-zero length */
ARGON2_SECRET_PTR_MISMATCH = -20, /* NULL ptr with non-zero length */
ARGON2_AD_PTR_MISMATCH = -21, /* NULL ptr with non-zero length */
ARGON2_MEMORY_ALLOCATION_ERROR = -22,
ARGON2_FREE_MEMORY_CBK_NULL = -23,
ARGON2_ALLOCATE_MEMORY_CBK_NULL = -24,
ARGON2_INCORRECT_PARAMETER = -25,
ARGON2_INCORRECT_TYPE = -26,
ARGON2_OUT_PTR_MISMATCH = -27,
ARGON2_THREADS_TOO_FEW = -28,
ARGON2_THREADS_TOO_MANY = -29,
ARGON2_MISSING_ARGS = -30,
ARGON2_ENCODING_FAIL = -31,
ARGON2_DECODING_FAIL = -32,
ARGON2_THREAD_FAIL = -33,
ARGON2_DECODING_LENGTH_FAIL = -34,
ARGON2_VERIFY_MISMATCH = -35
} argon2_error_codes;
/* Memory allocator types --- for external allocation */
typedef int(*allocate_fptr)(uint8_t **memory, size_t bytes_to_allocate);
typedef void(*deallocate_fptr)(uint8_t *memory, size_t bytes_to_allocate);
/* Argon2 external data structures */
/*
*****
* Context: structure to hold Argon2 inputs:
* output array and its length,
* password and its length,
* salt and its length,
* secret and its length,
* associated data and its length,
* number of passes, amount of used memory (in KBytes, can be rounded up a bit)
* number of parallel threads that will be run.
* All the parameters above affect the output hash value.
* Additionally, two function pointers can be provided to allocate and
* deallocate the memory (if NULL, memory will be allocated internally).
* Also, three flags indicate whether to erase password, secret as soon as they
* are pre-hashed (and thus not needed anymore), and the entire memory
*****
* Simplest situation: you have output array out[8], password is stored in
* pwd[32], salt is stored in salt[16], you do not have keys nor associated
* data. You need to spend 1 GB of RAM and you run 5 passes of Argon2d with
* 4 parallel lanes.
* You want to erase the password, but you're OK with last pass not being
* erased. You want to use the default memory allocator.
* Then you initialize:
Argon2_Context(out,8,pwd,32,salt,16,NULL,0,NULL,0,5,1<<20,4,4,NULL,NULL,true,false,false,false)
*/
typedef struct Argon2_Context {
uint8_t *out; /* output array */
uint32_t outlen; /* digest length */
uint8_t *pwd; /* password array */
uint32_t pwdlen; /* password length */
uint8_t *salt; /* salt array */
uint32_t saltlen; /* salt length */
uint8_t *secret; /* key array */
uint32_t secretlen; /* key length */
uint8_t *ad; /* associated data array */
uint32_t adlen; /* associated data length */
uint32_t t_cost; /* number of passes */
uint32_t m_cost; /* amount of memory requested (KB) */
uint32_t lanes; /* number of lanes */
uint32_t threads; /* maximum number of threads */
uint32_t version; /* version number */
allocate_fptr allocate_cbk; /* pointer to memory allocator */
deallocate_fptr free_cbk; /* pointer to memory deallocator */
uint32_t flags; /* array of bool options */
} argon2_context;
/* Argon2 primitive type */
typedef enum Argon2_type {
Argon2_d = 0,
Argon2_i = 1,
Argon2_id = 2
} argon2_type;
/* Version of the algorithm */
typedef enum Argon2_version {
ARGON2_VERSION_10 = 0x10,
ARGON2_VERSION_13 = 0x13,
ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
} argon2_version;
//Argon2 instance - forward declaration
typedef struct Argon2_instance_t argon2_instance_t;
//Argon2 position = forward declaration
typedef struct Argon2_position_t argon2_position_t;
//Argon2 implementation function
typedef void randomx_argon2_impl(const argon2_instance_t* instance,
argon2_position_t position);
#if defined(__cplusplus)
extern "C" {
#endif
/*
* Function that fills the segment using previous segments also from other
* threads
* @param context current context
* @param instance Pointer to the current instance
* @param position Current position
* @pre all block pointers must be valid
*/
void randomx_argon2_fill_segment_ref(const argon2_instance_t* instance,
argon2_position_t position);
randomx_argon2_impl *randomx_argon2_impl_ssse3();
randomx_argon2_impl *randomx_argon2_impl_avx2();
#if defined(__cplusplus)
}
#endif

175
randomx/argon2_avx2.c Normal file
View File

@ -0,0 +1,175 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "argon2.h"
void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
argon2_position_t position);
randomx_argon2_impl* randomx_argon2_impl_avx2() {
#if defined(__AVX2__)
return &randomx_argon2_fill_segment_avx2;
#endif
return NULL;
}
#if defined(__AVX2__)
#include "argon2_core.h"
#include "blake2/blamka-round-avx2.h"
#include "blake2/blake2-impl.h"
#include "blake2/blake2.h"
static void fill_block(__m256i* state, const block* ref_block,
block* next_block, int with_xor) {
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
block_XY[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i*)next_block->v + i));
}
}
else {
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm256_xor_si256(
state[i], _mm256_loadu_si256((const __m256i*)ref_block->v + i));
}
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
}
for (i = 0; i < 4; ++i) {
BLAKE2_ROUND_2(state[0 + i], state[4 + i], state[8 + i], state[12 + i],
state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
}
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
state[i] = _mm256_xor_si256(state[i], block_XY[i]);
_mm256_storeu_si256((__m256i*)next_block->v + i, state[i]);
}
}
void randomx_argon2_fill_segment_avx2(const argon2_instance_t* instance,
argon2_position_t position) {
// printf("randomx_argon2_fill_segment_avx2\n");
block* ref_block = NULL, * curr_block = NULL;
block address_block, input_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m256i state[ARGON2_HWORDS_IN_BLOCK];
if (instance == NULL) {
return;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
}
else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
pseudo_rand = instance->memory[prev_offset].v[0];
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
if (ARGON2_VERSION_10 == instance->version) {
/* version 1.2.1 and earlier: overwrite, not XOR */
fill_block(state, ref_block, curr_block, 0);
}
else {
if (0 == position.pass) {
fill_block(state, ref_block, curr_block, 0);
}
else {
fill_block(state, ref_block, curr_block, 1);
}
}
}
}
#endif

396
randomx/argon2_core.c Normal file
View File

@ -0,0 +1,396 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
/*For memory wiping*/
#ifdef _MSC_VER
#include <windows.h>
#include <winbase.h> /* For SecureZeroMemory */
#endif
#if defined __STDC_LIB_EXT1__
#define __STDC_WANT_LIB_EXT1__ 1
#endif
#define VC_GE_2005(version) (version >= 1400)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "argon2_core.h"
#include "blake2/blake2.h"
#include "blake2/blake2-impl.h"
#ifdef GENKAT
#include "genkat.h"
#endif
#if defined(__clang__)
#if __has_attribute(optnone)
#define NOT_OPTIMIZED __attribute__((optnone))
#endif
#elif defined(__GNUC__)
#define GCC_VERSION \
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#if GCC_VERSION >= 40400
#define NOT_OPTIMIZED __attribute__((optimize("O0")))
#endif
#endif
#ifndef NOT_OPTIMIZED
#define NOT_OPTIMIZED
#endif
/***************Instance and Position constructors**********/
static void load_block(block *dst, const void *input) {
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] = load64((const uint8_t *)input + i * sizeof(dst->v[i]));
}
}
static void store_block(void *output, const block *src) {
unsigned i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
store64((uint8_t *)output + i * sizeof(src->v[i]), src->v[i]);
}
}
uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,const argon2_position_t *position, uint32_t pseudo_rand,int same_lane) {
/*
* Pass 0:
* This lane : all already finished segments plus already constructed
* blocks in this segment
* Other lanes : all already finished segments
* Pass 1+:
* This lane : (SYNC_POINTS - 1) last segments plus already constructed
* blocks in this segment
* Other lanes : (SYNC_POINTS - 1) last segments
*/
uint32_t reference_area_size;
uint64_t relative_position;
uint32_t start_position, absolute_position;
if (0 == position->pass) {
/* First pass */
if (0 == position->slice) {
/* First slice */
reference_area_size = position->index - 1; /* all but the previous */
}
else {
if (same_lane) {
/* The same lane => add current segment */
reference_area_size =position->slice * instance->segment_length +position->index - 1;
}
else {
reference_area_size =position->slice * instance->segment_length +((position->index == 0) ? (-1) : 0);
}
}
}
else {
/* Second pass */
if (same_lane) {
reference_area_size = instance->lane_length -
instance->segment_length + position->index -
1;
}
else {
reference_area_size = instance->lane_length -
instance->segment_length +
((position->index == 0) ? (-1) : 0);
}
}
/* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
* relative position */
relative_position = pseudo_rand;
relative_position = relative_position * relative_position >> 32;
relative_position = reference_area_size - 1 - (reference_area_size * relative_position >> 32);
/* 1.2.5 Computing starting position */
start_position = 0;
if (0 != position->pass) {
start_position = (position->slice == ARGON2_SYNC_POINTS - 1)? 0 : (position->slice + 1) * instance->segment_length;
}
/* 1.2.6. Computing absolute position */
absolute_position = (start_position + relative_position) % instance->lane_length; /* absolute position */
return absolute_position;
}
/* Single-threaded version for p=1 case */
static int fill_memory_blocks_st(argon2_instance_t *instance) {
uint32_t r, s, l;
for (r = 0; r < instance->passes; ++r) { //3
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { //4
for (l = 0; l < instance->lanes; ++l) { //1
argon2_position_t position = { r, l, (uint8_t)s, 0 };
//fill the segment using the selected implementation
instance->impl(instance, position);
}
}
}
return ARGON2_OK;
}
int randomx_argon2_fill_memory_blocks(argon2_instance_t *instance) {
if (instance == NULL || instance->lanes == 0) {
return ARGON2_INCORRECT_PARAMETER;
}
return fill_memory_blocks_st(instance);
}
int randomx_argon2_validate_inputs(const argon2_context *context) {
if (NULL == context) {
return ARGON2_INCORRECT_PARAMETER;
}
/* Validate password (required param) */
if (NULL == context->pwd) {
if (0 != context->pwdlen) {
return ARGON2_PWD_PTR_MISMATCH;
}
}
if (ARGON2_MIN_PWD_LENGTH > context->pwdlen) {
return ARGON2_PWD_TOO_SHORT;
}
if (ARGON2_MAX_PWD_LENGTH < context->pwdlen) {
return ARGON2_PWD_TOO_LONG;
}
/* Validate salt (required param) */
if (NULL == context->salt) {
if (0 != context->saltlen) {
return ARGON2_SALT_PTR_MISMATCH;
}
}
if (ARGON2_MIN_SALT_LENGTH > context->saltlen) {
return ARGON2_SALT_TOO_SHORT;
}
if (ARGON2_MAX_SALT_LENGTH < context->saltlen) {
return ARGON2_SALT_TOO_LONG;
}
/* Validate secret (optional param) */
if (NULL == context->secret) {
if (0 != context->secretlen) {
return ARGON2_SECRET_PTR_MISMATCH;
}
}
else {
if (ARGON2_MIN_SECRET > context->secretlen) {
return ARGON2_SECRET_TOO_SHORT;
}
if (ARGON2_MAX_SECRET < context->secretlen) {
return ARGON2_SECRET_TOO_LONG;
}
}
/* Validate associated data (optional param) */
if (NULL == context->ad) {
if (0 != context->adlen) {
return ARGON2_AD_PTR_MISMATCH;
}
}
else {
if (ARGON2_MIN_AD_LENGTH > context->adlen) {
return ARGON2_AD_TOO_SHORT;
}
if (ARGON2_MAX_AD_LENGTH < context->adlen) {
return ARGON2_AD_TOO_LONG;
}
}
/* Validate memory cost */
if (ARGON2_MIN_MEMORY > context->m_cost) {
return ARGON2_MEMORY_TOO_LITTLE;
}
if (ARGON2_MAX_MEMORY < context->m_cost) {
return ARGON2_MEMORY_TOO_MUCH;
}
if (context->m_cost < 8 * context->lanes) {
return ARGON2_MEMORY_TOO_LITTLE;
}
/* Validate time cost */
if (ARGON2_MIN_TIME > context->t_cost) {
return ARGON2_TIME_TOO_SMALL;
}
if (ARGON2_MAX_TIME < context->t_cost) {
return ARGON2_TIME_TOO_LARGE;
}
/* Validate lanes */
if (ARGON2_MIN_LANES > context->lanes) {
return ARGON2_LANES_TOO_FEW;
}
if (ARGON2_MAX_LANES < context->lanes) {
return ARGON2_LANES_TOO_MANY;
}
/* Validate threads */
if (ARGON2_MIN_THREADS > context->threads) {
return ARGON2_THREADS_TOO_FEW;
}
if (ARGON2_MAX_THREADS < context->threads) {
return ARGON2_THREADS_TOO_MANY;
}
if (NULL != context->allocate_cbk && NULL == context->free_cbk) {
return ARGON2_FREE_MEMORY_CBK_NULL;
}
if (NULL == context->allocate_cbk && NULL != context->free_cbk) {
return ARGON2_ALLOCATE_MEMORY_CBK_NULL;
}
return ARGON2_OK;
}
void rxa2_fill_first_blocks(uint8_t *blockhash, const argon2_instance_t *instance) {
uint32_t l;
/* Make the first and second block in each lane as G(H0||0||i) or
G(H0||1||i) */
uint8_t blockhash_bytes[ARGON2_BLOCK_SIZE]; //ARGON2_BLOCK_SIZE=1024
for (l = 0; l < instance->lanes; ++l) {
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->memory[l * instance->lane_length + 0],blockhash_bytes);
store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
blake2b_long(blockhash_bytes, ARGON2_BLOCK_SIZE, blockhash,ARGON2_PREHASH_SEED_LENGTH);
load_block(&instance->memory[l * instance->lane_length + 1],blockhash_bytes);
}
}
void rxa2_initial_hash(uint8_t *blockhash, argon2_context *context, argon2_type type) {
blake2b_state BlakeHash;
uint8_t value[sizeof(uint32_t)];
if (NULL == context || NULL == blockhash) {
return;
}
blake2b_init(&BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);
store32(&value, context->lanes);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->outlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->m_cost);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->t_cost);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->version);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, (uint32_t)type);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
store32(&value, context->pwdlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->pwd != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->pwd,
context->pwdlen);
}
store32(&value, context->saltlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->salt != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->salt, context->saltlen);
}
store32(&value, context->secretlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->secret != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->secret,
context->secretlen);
}
store32(&value, context->adlen);
blake2b_update(&BlakeHash, (const uint8_t *)&value, sizeof(value));
if (context->ad != NULL) {
blake2b_update(&BlakeHash, (const uint8_t *)context->ad,
context->adlen);
}
blake2b_final(&BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH); //ARGON2_PREHASH_DIGEST_LENGTH=64
}
int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context) {
uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; //ARGON2_PREHASH_SEED_LENGTH=72
int result = ARGON2_OK;
if (instance == NULL || context == NULL)
return ARGON2_INCORRECT_PARAMETER;
instance->context_ptr = context;
/* 1. Memory allocation */
//RandomX takes care of memory allocation
/* 2. Initial hashing */
/* H_0 + 8 extra bytes to produce the first blocks */
/* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
/* Hashing all inputs */
rxa2_initial_hash(blockhash, context, instance->type);
/* Zeroing 8 extra bytes */
/*rxa2_clear_internal_memory(blockhash + ARGON2_PREHASH_DIGEST_LENGTH,
ARGON2_PREHASH_SEED_LENGTH -
ARGON2_PREHASH_DIGEST_LENGTH);*/
/* 3. Creating first blocks, we always have at least two blocks in a slice
*/
rxa2_fill_first_blocks(blockhash, instance);
return ARGON2_OK;
}

163
randomx/argon2_core.h Normal file
View File

@ -0,0 +1,163 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#ifndef ARGON2_CORE_H
#define ARGON2_CORE_H
#include <stdint.h>
#include "argon2.h"
#if defined(__cplusplus)
extern "C" {
#endif
#define CONST_CAST(x) (x)(uintptr_t)
/**********************Argon2 internal constants*******************************/
enum argon2_core_constants {
/* Memory block size in bytes */
ARGON2_BLOCK_SIZE = 1024,
ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8,
ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16,
ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32,
ARGON2_512BIT_WORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 64,
/* Number of pseudo-random values generated by one call to Blake in Argon2i
to
generate reference block positions */
ARGON2_ADDRESSES_IN_BLOCK = 128,
/* Pre-hashing digest length and its extension*/
ARGON2_PREHASH_DIGEST_LENGTH = 64,
ARGON2_PREHASH_SEED_LENGTH = 72
};
/*************************Argon2 internal data types***********************/
/*
* Structure for the (1KB) memory block implemented as 128 64-bit words.
* Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
* bounds checking).
*/
typedef struct block_ { uint64_t v[ARGON2_QWORDS_IN_BLOCK]; } block;
/*
* Argon2 instance: memory pointer, number of passes, amount of memory, type,
* and derived values.
* Used to evaluate the number and location of blocks to construct in each
* thread
*/
typedef struct Argon2_instance_t {
block *memory; /* Memory pointer */
uint32_t version;
uint32_t passes; /* Number of passes */
uint32_t memory_blocks; /* Number of blocks in memory */
uint32_t segment_length;
uint32_t lane_length;
uint32_t lanes;
uint32_t threads;
argon2_type type;
int print_internals; /* whether to print the memory blocks */
argon2_context *context_ptr; /* points back to original context */
randomx_argon2_impl *impl;
} argon2_instance_t;
/*
* Argon2 position: where we construct the block right now. Used to distribute
* work between threads.
*/
typedef struct Argon2_position_t {
uint32_t pass;
uint32_t lane;
uint8_t slice;
uint32_t index;
} argon2_position_t;
/*Struct that holds the inputs for thread handling FillSegment*/
typedef struct Argon2_thread_data {
argon2_instance_t *instance_ptr;
argon2_position_t pos;
} argon2_thread_data;
/*************************Argon2 core functions********************************/
/*
* Computes absolute position of reference block in the lane following a skewed
* distribution and using a pseudo-random value as input
* @param instance Pointer to the current instance
* @param position Pointer to the current position
* @param pseudo_rand 32-bit pseudo-random value used to determine the position
* @param same_lane Indicates if the block will be taken from the current lane.
* If so we can reference the current segment
* @pre All pointers must be valid
*/
uint32_t randomx_argon2_index_alpha(const argon2_instance_t *instance,
const argon2_position_t *position, uint32_t pseudo_rand,
int same_lane);
/*
* Function that validates all inputs against predefined restrictions and return
* an error code
* @param context Pointer to current Argon2 context
* @return ARGON2_OK if everything is all right, otherwise one of error codes
* (all defined in <argon2.h>
*/
int randomx_argon2_validate_inputs(const argon2_context *context);
/*
* Function allocates memory, hashes the inputs with Blake, and creates first
* two blocks. Returns the pointer to the main memory with 2 blocks per lane
* initialized
* @param context Pointer to the Argon2 internal structure containing memory
* pointer, and parameters for time and space requirements.
* @param instance Current Argon2 instance
* @return Zero if successful, -1 if memory failed to allocate. @context->state
* will be modified if successful.
*/
int randomx_argon2_initialize(argon2_instance_t *instance, argon2_context *context);
/*
* Function that fills the entire memory t_cost times based on the first two
* blocks in each lane
* @param instance Pointer to the current instance
* @return ARGON2_OK if successful, @context->state
*/
int randomx_argon2_fill_memory_blocks(argon2_instance_t* instance);
#if defined(__cplusplus)
}
#endif
#endif

181
randomx/argon2_ref.c Normal file
View File

@ -0,0 +1,181 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <cstdio>
#include "argon2.h"
#include "argon2_core.h"
#include "blake2/blamka-round-ref.h"
#include "blake2/blake2-impl.h"
#include "blake2/blake2.h"
static void copy_block(block* dst, const block* src) {
memcpy(dst->v, src->v, sizeof(uint64_t) * ARGON2_QWORDS_IN_BLOCK);
}
static void xor_block(block* dst, const block* src) {
int i;
for (i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
dst->v[i] ^= src->v[i];
}
}
/*
* Function fills a new memory block and optionally XORs the old block over the new one.
* @next_block must be initialized.
* @param prev_block Pointer to the previous block
* @param ref_block Pointer to the reference block
* @param next_block Pointer to the block to be constructed
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
* @pre all block pointers must be valid
*/
static void fill_block(const block *prev_block, const block *ref_block,block *next_block, int with_xor) {
block blockR, block_tmp;
unsigned i;
copy_block(&blockR, ref_block);
xor_block(&blockR, prev_block);
copy_block(&block_tmp, &blockR);
/* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */
if (with_xor) {
/* Saving the next block contents for XOR over: */
xor_block(&block_tmp, next_block);
/* Now blockR = ref_block + prev_block and
block_tmp = ref_block + prev_block + next_block */
}
/* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
(16,17,..31)... finally (112,113,...127) */
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND_NOMSG(
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
blockR.v[16 * i + 15]);
}
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
for (i = 0; i < 8; i++) {
BLAKE2_ROUND_NOMSG(
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
blockR.v[2 * i + 113]);
}
copy_block(next_block, &block_tmp);
xor_block(next_block, &blockR);
}
void randomx_argon2_fill_segment_ref(const argon2_instance_t *instance,argon2_position_t position) {
printf("randomx_argon2_fill_segment_ref\n");
block *ref_block = NULL, *curr_block = NULL;
block address_block, input_block, zero_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index;
uint32_t i;
if (instance == NULL) {
return;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length + position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
}
else {
/* Previous block */
prev_offset = curr_offset - 1;
}
for (i = starting_index; i < instance->segment_length; ++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
pseudo_rand = instance->memory[prev_offset].v[0];
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes; //0
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,ref_lane == position.lane);
/* 2 Creating a new block */
ref_block = instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
if(i == starting_index && 0 == position.pass && 0 == position.slice) printf("ref_index = %d,curr_offset=%0d,prev_offset=%0d,ref_lane=%0d\n", ref_index, curr_offset, prev_offset, ref_lane);
if (ARGON2_VERSION_10 == instance->version) {
/* version 1.2.1 and earlier: overwrite, not XOR */
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
}
else {
if (0 == position.pass) {
fill_block(instance->memory + prev_offset, ref_block, curr_block, 0);
}
else {
fill_block(instance->memory + prev_offset, ref_block, curr_block, 1);
}
}
}
}

183
randomx/argon2_ssse3.c Normal file
View File

@ -0,0 +1,183 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "argon2.h"
#if defined(_MSC_VER) //MSVC doesn't define SSSE3
#define __SSSE3__
#endif
void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
argon2_position_t position);
randomx_argon2_impl* randomx_argon2_impl_ssse3() {
#if defined(__SSSE3__)
return &randomx_argon2_fill_segment_ssse3;
#endif
return NULL;
}
#if defined(__SSSE3__)
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
#include "argon2_core.h"
#include "blake2/blamka-round-ssse3.h"
#include "blake2/blake2-impl.h"
#include "blake2/blake2.h"
static void fill_block(__m128i* state, const block* ref_block,
block* next_block, int with_xor) {
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
unsigned int i;
if (with_xor) {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
block_XY[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i*)next_block->v + i));
}
}
else {
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((const __m128i*)ref_block->v + i));
}
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(state[i], block_XY[i]);
_mm_storeu_si128((__m128i*)next_block->v + i, state[i]);
}
}
void randomx_argon2_fill_segment_ssse3(const argon2_instance_t* instance,
argon2_position_t position) {
// printf("randomx_argon2_fill_segment_ssse3\n");
block* ref_block = NULL, * curr_block = NULL;
block address_block, input_block;
uint64_t pseudo_rand, ref_index, ref_lane;
uint32_t prev_offset, curr_offset;
uint32_t starting_index, i;
__m128i state[ARGON2_OWORDS_IN_BLOCK];
if (instance == NULL) {
return;
}
starting_index = 0;
if ((0 == position.pass) && (0 == position.slice)) {
starting_index = 2; /* we have already generated the first two blocks */
}
/* Offset of the current block */
curr_offset = position.lane * instance->lane_length +
position.slice * instance->segment_length + starting_index;
if (0 == curr_offset % instance->lane_length) {
/* Last block in this lane */
prev_offset = curr_offset + instance->lane_length - 1;
}
else {
/* Previous block */
prev_offset = curr_offset - 1;
}
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
for (i = starting_index; i < instance->segment_length;
++i, ++curr_offset, ++prev_offset) {
/*1.1 Rotating prev_offset if needed */
if (curr_offset % instance->lane_length == 1) {
prev_offset = curr_offset - 1;
}
/* 1.2 Computing the index of the reference block */
/* 1.2.1 Taking pseudo-random value from the previous block */
pseudo_rand = instance->memory[prev_offset].v[0];
/* 1.2.2 Computing the lane of the reference block */
ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
if ((position.pass == 0) && (position.slice == 0)) {
/* Can not reference other lanes yet */
ref_lane = position.lane;
}
/* 1.2.3 Computing the number of possible reference block within the
* lane.
*/
position.index = i;
ref_index = randomx_argon2_index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
ref_lane == position.lane);
/* 2 Creating a new block */
ref_block =
instance->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->memory + curr_offset;
if (ARGON2_VERSION_10 == instance->version) {
/* version 1.2.1 and earlier: overwrite, not XOR */
fill_block(state, ref_block, curr_block, 0);
}
else {
if (0 == position.pass) {
fill_block(state, ref_block, curr_block, 0);
}
else {
fill_block(state, ref_block, curr_block, 1);
}
}
}
}
#endif

View File

@ -0,0 +1,48 @@
; File start: ..\src\configuration.h
RANDOMX_ARGON_MEMORY EQU 262144t
RANDOMX_ARGON_ITERATIONS EQU 3t
RANDOMX_ARGON_LANES EQU 1t
RANDOMX_ARGON_SALT TEXTEQU <"RandomX\x03">
RANDOMX_CACHE_ACCESSES EQU 8t
RANDOMX_SUPERSCALAR_LATENCY EQU 170t
RANDOMX_DATASET_BASE_SIZE EQU 2147483648t
RANDOMX_DATASET_EXTRA_SIZE EQU 33554368t
RANDOMX_PROGRAM_SIZE EQU 256t
RANDOMX_PROGRAM_ITERATIONS EQU 2048t
RANDOMX_PROGRAM_COUNT EQU 8t
RANDOMX_SCRATCHPAD_L3 EQU 2097152t
RANDOMX_SCRATCHPAD_L2 EQU 262144t
RANDOMX_SCRATCHPAD_L1 EQU 16384t
RANDOMX_JUMP_BITS EQU 8t
RANDOMX_JUMP_OFFSET EQU 8t
RANDOMX_FREQ_IADD_RS EQU 16t
RANDOMX_FREQ_IADD_M EQU 7t
RANDOMX_FREQ_ISUB_R EQU 16t
RANDOMX_FREQ_ISUB_M EQU 7t
RANDOMX_FREQ_IMUL_R EQU 16t
RANDOMX_FREQ_IMUL_M EQU 4t
RANDOMX_FREQ_IMULH_R EQU 4t
RANDOMX_FREQ_IMULH_M EQU 1t
RANDOMX_FREQ_ISMULH_R EQU 4t
RANDOMX_FREQ_ISMULH_M EQU 1t
RANDOMX_FREQ_IMUL_RCP EQU 8t
RANDOMX_FREQ_INEG_R EQU 2t
RANDOMX_FREQ_IXOR_R EQU 15t
RANDOMX_FREQ_IXOR_M EQU 5t
RANDOMX_FREQ_IROR_R EQU 8t
RANDOMX_FREQ_IROL_R EQU 2t
RANDOMX_FREQ_ISWAP_R EQU 4t
RANDOMX_FREQ_FSWAP_R EQU 4t
RANDOMX_FREQ_FADD_R EQU 16t
RANDOMX_FREQ_FADD_M EQU 5t
RANDOMX_FREQ_FSUB_R EQU 16t
RANDOMX_FREQ_FSUB_M EQU 5t
RANDOMX_FREQ_FSCAL_R EQU 6t
RANDOMX_FREQ_FMUL_R EQU 32t
RANDOMX_FREQ_FDIV_M EQU 4t
RANDOMX_FREQ_FSQRT_R EQU 6t
RANDOMX_FREQ_CBRANCH EQU 25t
RANDOMX_FREQ_CFROUND EQU 1t
RANDOMX_FREQ_ISTORE EQU 16t
RANDOMX_FREQ_NOP EQU 0t
; File end: ..\src\configuration.h

View File

@ -0,0 +1,10 @@
;# restore callee-saved registers - System V AMD64 ABI
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
;# program finished
ret 0

View File

@ -0,0 +1,19 @@
;# save VM register values
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
mov qword ptr [rcx+24], r11
mov qword ptr [rcx+32], r12
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
movdqa xmmword ptr [rcx+64], xmm0
movdqa xmmword ptr [rcx+80], xmm1
movdqa xmmword ptr [rcx+96], xmm2
movdqa xmmword ptr [rcx+112], xmm3
lea rcx, [rcx+64]
movdqa xmmword ptr [rcx+64], xmm4
movdqa xmmword ptr [rcx+80], xmm5
movdqa xmmword ptr [rcx+96], xmm6
movdqa xmmword ptr [rcx+112], xmm7

View File

@ -0,0 +1,24 @@
;# restore callee-saved registers - Microsoft x64 calling convention
movdqu xmm15, xmmword ptr [rsp]
movdqu xmm14, xmmword ptr [rsp+16]
movdqu xmm13, xmmword ptr [rsp+32]
movdqu xmm12, xmmword ptr [rsp+48]
movdqu xmm11, xmmword ptr [rsp+64]
add rsp, 80
movdqu xmm10, xmmword ptr [rsp]
movdqu xmm9, xmmword ptr [rsp+16]
movdqu xmm8, xmmword ptr [rsp+32]
movdqu xmm7, xmmword ptr [rsp+48]
movdqu xmm6, xmmword ptr [rsp+64]
add rsp, 80
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rdi
pop rbp
pop rbx
;# program finished
ret

View File

@ -0,0 +1,28 @@
lea rcx, [rsi+rax]
push rcx
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]
lea rcx, [rsi+rdx]
push rcx
cvtdq2pd xmm0, qword ptr [rcx+0]
cvtdq2pd xmm1, qword ptr [rcx+8]
cvtdq2pd xmm2, qword ptr [rcx+16]
cvtdq2pd xmm3, qword ptr [rcx+24]
cvtdq2pd xmm4, qword ptr [rcx+32]
cvtdq2pd xmm5, qword ptr [rcx+40]
cvtdq2pd xmm6, qword ptr [rcx+48]
cvtdq2pd xmm7, qword ptr [rcx+56]
andps xmm4, xmm13
andps xmm5, xmm13
andps xmm6, xmm13
andps xmm7, xmm13
orps xmm4, xmm14
orps xmm5, xmm14
orps xmm6, xmm14
orps xmm7, xmm14

View File

@ -0,0 +1,18 @@
pop rcx
mov qword ptr [rcx+0], r8
mov qword ptr [rcx+8], r9
mov qword ptr [rcx+16], r10
mov qword ptr [rcx+24], r11
mov qword ptr [rcx+32], r12
mov qword ptr [rcx+40], r13
mov qword ptr [rcx+48], r14
mov qword ptr [rcx+56], r15
pop rcx
xorpd xmm0, xmm4
xorpd xmm1, xmm5
xorpd xmm2, xmm6
xorpd xmm3, xmm7
movapd xmmword ptr [rcx+0], xmm0
movapd xmmword ptr [rcx+16], xmm1
movapd xmmword ptr [rcx+32], xmm2
movapd xmmword ptr [rcx+48], xmm3

View File

@ -0,0 +1,34 @@
;# callee-saved registers - System V AMD64 ABI
push rbx
push rbp
push r12
push r13
push r14
push r15
;# function arguments
mov rbx, rcx ;# loop counter
push rdi ;# RegisterFile& registerFile
mov rcx, rdi
mov rbp, qword ptr [rsi] ;# "mx", "ma"
mov rdi, qword ptr [rsi+8] ;# uint8_t* dataset
mov rsi, rdx ;# uint8_t* scratchpad
mov rax, rbp
;# zero integer registers
xor r8, r8
xor r9, r9
xor r10, r10
xor r11, r11
xor r12, r12
xor r13, r13
xor r14, r14
xor r15, r15
;# load constant registers
lea rcx, [rcx+120]
movapd xmm8, xmmword ptr [rcx+72]
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]

View File

@ -0,0 +1,47 @@
;# callee-saved registers - Microsoft x64 calling convention
push rbx
push rbp
push rdi
push rsi
push r12
push r13
push r14
push r15
sub rsp, 80
movdqu xmmword ptr [rsp+64], xmm6
movdqu xmmword ptr [rsp+48], xmm7
movdqu xmmword ptr [rsp+32], xmm8
movdqu xmmword ptr [rsp+16], xmm9
movdqu xmmword ptr [rsp+0], xmm10
sub rsp, 80
movdqu xmmword ptr [rsp+64], xmm11
movdqu xmmword ptr [rsp+48], xmm12
movdqu xmmword ptr [rsp+32], xmm13
movdqu xmmword ptr [rsp+16], xmm14
movdqu xmmword ptr [rsp+0], xmm15
;# function arguments
push rcx ;# RegisterFile& registerFile
mov rbp, qword ptr [rdx] ;# "mx", "ma"
mov rdi, qword ptr [rdx+8] ;# uint8_t* dataset
mov rsi, r8 ;# uint8_t* scratchpad
mov rbx, r9 ;# loop counter
mov rax, rbp
;# zero integer registers
xor r8, r8
xor r9, r9
xor r10, r10
xor r11, r11
xor r12, r12
xor r13, r13
xor r14, r14
xor r15, r15
;# load constant registers
lea rcx, [rcx+120]
movapd xmm8, xmmword ptr [rcx+72]
movapd xmm9, xmmword ptr [rcx+88]
movapd xmm10, xmmword ptr [rcx+104]
movapd xmm11, xmmword ptr [rcx+120]

View File

@ -0,0 +1,17 @@
xor rbp, rax ;# modify "mx"
mov edx, ebp ;# edx = mx
and edx, RANDOMX_DATASET_BASE_MASK
prefetchnta byte ptr [rdi+rdx]
ror rbp, 32 ;# swap "ma" and "mx"
mov edx, ebp ;# edx = ma
and edx, RANDOMX_DATASET_BASE_MASK
lea rcx, [rdi+rdx] ;# dataset cache line
xor r8, qword ptr [rcx+0]
xor r9, qword ptr [rcx+8]
xor r10, qword ptr [rcx+16]
xor r11, qword ptr [rcx+24]
xor r12, qword ptr [rcx+32]
xor r13, qword ptr [rcx+40]
xor r14, qword ptr [rcx+48]
xor r15, qword ptr [rcx+56]

View File

@ -0,0 +1,10 @@
mov rbx, qword ptr [rsp+64]
xor r8, qword ptr [rsp+56]
xor r9, qword ptr [rsp+48]
xor r10, qword ptr [rsp+40]
xor r11, qword ptr [rsp+32]
xor r12, qword ptr [rsp+24]
xor r13, qword ptr [rsp+16]
xor r14, qword ptr [rsp+8]
xor r15, qword ptr [rsp+0]
add rsp, 72

View File

@ -0,0 +1,17 @@
sub rsp, 72
mov qword ptr [rsp+64], rbx
mov qword ptr [rsp+56], r8
mov qword ptr [rsp+48], r9
mov qword ptr [rsp+40], r10
mov qword ptr [rsp+32], r11
mov qword ptr [rsp+24], r12
mov qword ptr [rsp+16], r13
mov qword ptr [rsp+8], r14
mov qword ptr [rsp+0], r15
xor rbp, rax ;# modify "mx"
ror rbp, 32 ;# swap "ma" and "mx"
mov ebx, ebp ;# ecx = ma
and ebx, RANDOMX_DATASET_BASE_MASK
shr ebx, 6 ;# ebx = Dataset block number
;# add ebx, datasetOffset / 64
;# call 32768

View File

@ -0,0 +1,24 @@
r0_mul:
;#/ 6364136223846793005
db 45, 127, 149, 76, 45, 244, 81, 88
r1_add:
;#/ 9298411001130361340
db 252, 161, 245, 89, 138, 151, 10, 129
r2_add:
;#/ 12065312585734608966
db 70, 216, 194, 56, 223, 153, 112, 167
r3_add:
;#/ 9306329213124626780
db 92, 73, 34, 191, 28, 185, 38, 129
r4_add:
;#/ 5281919268842080866
db 98, 138, 159, 23, 151, 37, 77, 73
r5_add:
;#/ 10536153434571861004
db 12, 236, 170, 206, 185, 239, 55, 146
r6_add:
;#/ 3398623926847679864
db 120, 45, 230, 108, 116, 86, 42, 47
r7_add:
;#/ 9549104520008361294
db 78, 229, 44, 182, 247, 59, 133, 132

View File

@ -0,0 +1,8 @@
xor r8, qword ptr [rbx+0]
xor r9, qword ptr [rbx+8]
xor r10, qword ptr [rbx+16]
xor r11, qword ptr [rbx+24]
xor r12, qword ptr [rbx+32]
xor r13, qword ptr [rbx+40]
xor r14, qword ptr [rbx+48]
xor r15, qword ptr [rbx+56]

View File

@ -0,0 +1,4 @@
and rbx, RANDOMX_CACHE_MASK
shl rbx, 6
add rbx, rdi
prefetchnta byte ptr [rbx]

View File

@ -0,0 +1,6 @@
mantissaMask:
db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
exp240:
db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
scaleMask:
db 0, 0, 0, 0, 0, 0, 240, 128, 0, 0, 0, 0, 0, 0, 240, 128

View File

@ -0,0 +1,7 @@
mov edx, 1
mov r8, rcx
xor eax, eax
bsr rcx, rcx
shl rdx, cl
div r8
ret

View File

@ -0,0 +1,612 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <climits>
#include "assembly_generator_x86.hpp"
#include "common.hpp"
#include "reciprocal.h"
#include "program.hpp"
#include "superscalar.hpp"
namespace randomx {
static const char* regR[] = { "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" };
static const char* regR32[] = { "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" };
static const char* regFE[] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" };
static const char* regF[] = { "xmm0", "xmm1", "xmm2", "xmm3" };
static const char* regE[] = { "xmm4", "xmm5", "xmm6", "xmm7" };
static const char* regA[] = { "xmm8", "xmm9", "xmm10", "xmm11" };
static const char* tempRegx = "xmm12";
static const char* mantissaMaskReg = "xmm13";
static const char* exponentMaskReg = "xmm14";
static const char* scaleMaskReg = "xmm15";
static const char* regIc = "rbx";
static const char* regIc32 = "ebx";
static const char* regIc8 = "bl";
static const char* regScratchpadAddr = "rsi";
void AssemblyGeneratorX86::generateProgram(Program& prog) {
//printf("---\n");
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}
asmCode.str(std::string()); //clear
for (unsigned i = 0; i < prog.getSize(); ++i) {
asmCode << "randomx_isn_" << i << ":" << std::endl;
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
generateCode(instr, i);
}
}
void AssemblyGeneratorX86::generateAsm(SuperscalarProgram& prog) {
asmCode.str(std::string()); //clear
#ifdef RANDOMX_ALIGN
asmCode << "ALIGN 16" << std::endl;
#endif
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
switch ((SuperscalarInstructionType)instr.opcode)
{
case SuperscalarInstructionType::ISUB_R:
asmCode << "sub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
break;
case SuperscalarInstructionType::IXOR_R:
asmCode << "xor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
break;
case SuperscalarInstructionType::IADD_RS:
asmCode << "lea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
break;
case SuperscalarInstructionType::IMUL_R:
asmCode << "imul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
break;
case SuperscalarInstructionType::IROR_C:
asmCode << "ror " << regR[instr.dst] << ", " << instr.getImm32() << std::endl;
break;
case SuperscalarInstructionType::IADD_C7:
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
break;
case SuperscalarInstructionType::IXOR_C7:
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
break;
case SuperscalarInstructionType::IADD_C8:
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
#ifdef RANDOMX_ALIGN
asmCode << "nop" << std::endl;
#endif
break;
case SuperscalarInstructionType::IXOR_C8:
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
#ifdef RANDOMX_ALIGN
asmCode << "nop" << std::endl;
#endif
break;
case SuperscalarInstructionType::IADD_C9:
asmCode << "add " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
#ifdef RANDOMX_ALIGN
asmCode << "xchg ax, ax ;nop" << std::endl;
#endif
break;
case SuperscalarInstructionType::IXOR_C9:
asmCode << "xor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
#ifdef RANDOMX_ALIGN
asmCode << "xchg ax, ax ;nop" << std::endl;
#endif
break;
case SuperscalarInstructionType::IMULH_R:
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
asmCode << "mul " << regR[instr.src] << std::endl;
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
break;
case SuperscalarInstructionType::ISMULH_R:
asmCode << "mov rax, " << regR[instr.dst] << std::endl;
asmCode << "imul " << regR[instr.src] << std::endl;
asmCode << "mov " << regR[instr.dst] << ", rdx" << std::endl;
break;
case SuperscalarInstructionType::IMUL_RCP:
asmCode << "mov rax, " << (int64_t)randomx_reciprocal(instr.getImm32()) << std::endl;
asmCode << "imul " << regR[instr.dst] << ", rax" << std::endl;
break;
default:
UNREACHABLE;
}
}
}
void AssemblyGeneratorX86::generateC(SuperscalarProgram& prog) {
asmCode.str(std::string()); //clear
asmCode << "#include <stdint.h>" << std::endl;
asmCode << "#if defined(__SIZEOF_INT128__)" << std::endl;
asmCode << " static inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
asmCode << " return ((unsigned __int128)a * b) >> 64;" << std::endl;
asmCode << " }" << std::endl;
asmCode << " static inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
asmCode << " return ((__int128)a * b) >> 64;" << std::endl;
asmCode << " }" << std::endl;
asmCode << " #define HAVE_MULH" << std::endl;
asmCode << " #define HAVE_SMULH" << std::endl;
asmCode << "#endif" << std::endl;
asmCode << "#if defined(_MSC_VER)" << std::endl;
asmCode << " #define HAS_VALUE(X) X ## 0" << std::endl;
asmCode << " #define EVAL_DEFINE(X) HAS_VALUE(X)" << std::endl;
asmCode << " #include <intrin.h>" << std::endl;
asmCode << " #include <stdlib.h>" << std::endl;
asmCode << " static __inline uint64_t rotr(uint64_t x , int c) {" << std::endl;
asmCode << " return _rotr64(x, c);" << std::endl;
asmCode << " }" << std::endl;
asmCode << " #define HAVE_ROTR" << std::endl;
asmCode << " #if EVAL_DEFINE(__MACHINEARM64_X64(1))" << std::endl;
asmCode << " static __inline uint64_t mulh(uint64_t a, uint64_t b) {" << std::endl;
asmCode << " return __umulh(a, b);" << std::endl;
asmCode << " }" << std::endl;
asmCode << " #define HAVE_MULH" << std::endl;
asmCode << " #endif" << std::endl;
asmCode << " #if EVAL_DEFINE(__MACHINEX64(1))" << std::endl;
asmCode << " static __inline int64_t smulh(int64_t a, int64_t b) {" << std::endl;
asmCode << " int64_t hi;" << std::endl;
asmCode << " _mul128(a, b, &hi);" << std::endl;
asmCode << " return hi;" << std::endl;
asmCode << " }" << std::endl;
asmCode << " #define HAVE_SMULH" << std::endl;
asmCode << " #endif" << std::endl;
asmCode << "#endif" << std::endl;
asmCode << "#ifndef HAVE_ROTR" << std::endl;
asmCode << " static inline uint64_t rotr(uint64_t a, int b) {" << std::endl;
asmCode << " return (a >> b) | (a << (64 - b));" << std::endl;
asmCode << " }" << std::endl;
asmCode << " #define HAVE_ROTR" << std::endl;
asmCode << "#endif" << std::endl;
asmCode << "#if !defined(HAVE_MULH) || !defined(HAVE_SMULH) || !defined(HAVE_ROTR)" << std::endl;
asmCode << " #error \"Required functions are not defined\"" << std::endl;
asmCode << "#endif" << std::endl;
asmCode << "void superScalar(uint64_t r[8]) {" << std::endl;
asmCode << "uint64_t r8 = r[0], r9 = r[1], r10 = r[2], r11 = r[3], r12 = r[4], r13 = r[5], r14 = r[6], r15 = r[7];" << std::endl;
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
switch ((SuperscalarInstructionType)instr.opcode)
{
case SuperscalarInstructionType::ISUB_R:
asmCode << regR[instr.dst] << " -= " << regR[instr.src] << ";" << std::endl;
break;
case SuperscalarInstructionType::IXOR_R:
asmCode << regR[instr.dst] << " ^= " << regR[instr.src] << ";" << std::endl;
break;
case SuperscalarInstructionType::IADD_RS:
asmCode << regR[instr.dst] << " += " << regR[instr.src] << "*" << (1 << (instr.getModShift())) << ";" << std::endl;
break;
case SuperscalarInstructionType::IMUL_R:
asmCode << regR[instr.dst] << " *= " << regR[instr.src] << ";" << std::endl;
break;
case SuperscalarInstructionType::IROR_C:
asmCode << regR[instr.dst] << " = rotr(" << regR[instr.dst] << ", " << instr.getImm32() << ");" << std::endl;
break;
case SuperscalarInstructionType::IADD_C7:
case SuperscalarInstructionType::IADD_C8:
case SuperscalarInstructionType::IADD_C9:
asmCode << regR[instr.dst] << " += " << (int32_t)instr.getImm32() << ";" << std::endl;
break;
case SuperscalarInstructionType::IXOR_C7:
case SuperscalarInstructionType::IXOR_C8:
case SuperscalarInstructionType::IXOR_C9:
asmCode << regR[instr.dst] << " ^= " << (int32_t)instr.getImm32() << ";" << std::endl;
break;
case SuperscalarInstructionType::IMULH_R:
asmCode << regR[instr.dst] << " = mulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
break;
case SuperscalarInstructionType::ISMULH_R:
asmCode << regR[instr.dst] << " = smulh(" << regR[instr.dst] << ", " << regR[instr.src] << ");" << std::endl;
break;
case SuperscalarInstructionType::IMUL_RCP:
asmCode << regR[instr.dst] << " *= " << (int64_t)randomx_reciprocal(instr.getImm32()) << ";" << std::endl;
break;
default:
UNREACHABLE;
}
}
asmCode << "r[0] = r8; r[1] = r9; r[2] = r10; r[3] = r11; r[4] = r12; r[5] = r13; r[6] = r14; r[7] = r15;" << std::endl;
asmCode << "}" << std::endl;
}
void AssemblyGeneratorX86::traceint(Instruction& instr) {
if (trace) {
asmCode << "\tpush " << regR[instr.dst] << std::endl;
}
}
void AssemblyGeneratorX86::traceflt(Instruction& instr) {
if (trace) {
asmCode << "\tpush 0" << std::endl;
}
}
void AssemblyGeneratorX86::tracenop(Instruction& instr) {
if (trace) {
asmCode << "\tpush 0" << std::endl;
}
}
void AssemblyGeneratorX86::generateCode(Instruction& instr, int i) {
asmCode << "\t; " << instr;
auto generator = engine[instr.opcode];
(this->*generator)(instr, i);
}
void AssemblyGeneratorX86::genAddressReg(Instruction& instr, const char* reg = "eax") {
asmCode << "\tlea " << reg << ", [" << regR32[instr.src] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
asmCode << "\tand " << reg << ", " << ((instr.getModMem()) ? ScratchpadL1Mask : ScratchpadL2Mask) << std::endl;
}
void AssemblyGeneratorX86::genAddressRegDst(Instruction& instr, int maskAlign = 8) {
asmCode << "\tlea eax, [" << regR32[instr.dst] << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
int mask;
if (instr.getModCond() < StoreL3Condition) {
mask = instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask;
}
else {
mask = ScratchpadL3Mask;
}
asmCode << "\tand eax" << ", " << (mask & (-maskAlign)) << std::endl;
}
int32_t AssemblyGeneratorX86::genAddressImm(Instruction& instr) {
return (int32_t)instr.getImm32() & ScratchpadL3Mask;
}
void AssemblyGeneratorX86::h_IADD_RS(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if(instr.dst == RegisterNeedsDisplacement)
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << std::showpos << (int32_t)instr.getImm32() << std::noshowpos << "]" << std::endl;
else
asmCode << "\tlea " << regR[instr.dst] << ", [" << regR[instr.dst] << "+" << regR[instr.src] << "*" << (1 << (instr.getModShift())) << "]" << std::endl;
traceint(instr);
}
void AssemblyGeneratorX86::h_IADD_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
}
else {
asmCode << "\tadd " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_ISUB_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\tsub " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
}
else {
asmCode << "\tsub " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_ISUB_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
}
else {
asmCode << "\tsub " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_IMUL_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\timul " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
}
else {
asmCode << "\timul " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_IMUL_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
}
else {
asmCode << "\timul " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_IMULH_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\tmul " << regR[instr.src] << std::endl;
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
traceint(instr);
}
void AssemblyGeneratorX86::h_IMULH_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, "ecx");
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
}
else {
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\tmul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
}
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
traceint(instr);
}
void AssemblyGeneratorX86::h_ISMULH_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\timul " << regR[instr.src] << std::endl;
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
traceint(instr);
}
void AssemblyGeneratorX86::h_ISMULH_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, "ecx");
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\timul qword ptr [" << regScratchpadAddr << "+rcx]" << std::endl;
}
else {
asmCode << "\tmov rax, " << regR[instr.dst] << std::endl;
asmCode << "\timul qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
}
asmCode << "\tmov " << regR[instr.dst] << ", rdx" << std::endl;
traceint(instr);
}
void AssemblyGeneratorX86::h_INEG_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
asmCode << "\tneg " << regR[instr.dst] << std::endl;
traceint(instr);
}
void AssemblyGeneratorX86::h_IXOR_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\txor " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
}
else {
asmCode << "\txor " << regR[instr.dst] << ", " << (int32_t)instr.getImm32() << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_IXOR_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
}
else {
asmCode << "\txor " << regR[instr.dst] << ", qword ptr [" << regScratchpadAddr << "+" << genAddressImm(instr) << "]" << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_IROR_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
asmCode << "\tror " << regR[instr.dst] << ", cl" << std::endl;
}
else {
asmCode << "\tror " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_IROL_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
asmCode << "\tmov ecx, " << regR32[instr.src] << std::endl;
asmCode << "\trol " << regR[instr.dst] << ", cl" << std::endl;
}
else {
asmCode << "\trol " << regR[instr.dst] << ", " << (instr.getImm32() & 63) << std::endl;
}
traceint(instr);
}
void AssemblyGeneratorX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
asmCode << "\tmov rax, " << randomx_reciprocal(divisor) << std::endl;
asmCode << "\timul " << regR[instr.dst] << ", rax" << std::endl;
traceint(instr);
}
else {
tracenop(instr);
}
}
void AssemblyGeneratorX86::h_ISWAP_R(Instruction& instr, int i) {
if (instr.src != instr.dst) {
registerUsage[instr.dst] = i;
registerUsage[instr.src] = i;
asmCode << "\txchg " << regR[instr.dst] << ", " << regR[instr.src] << std::endl;
traceint(instr);
}
else {
tracenop(instr);
}
}
void AssemblyGeneratorX86::h_FSWAP_R(Instruction& instr, int i) {
asmCode << "\tshufpd " << regFE[instr.dst] << ", " << regFE[instr.dst] << ", 1" << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FADD_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
asmCode << "\taddpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FADD_M(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\taddpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FSUB_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
asmCode << "\tsubpd " << regF[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FSUB_M(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\tsubpd " << regF[instr.dst] << ", " << tempRegx << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FSCAL_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
asmCode << "\txorps " << regF[instr.dst] << ", " << scaleMaskReg << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FMUL_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
asmCode << "\tmulpd " << regE[instr.dst] << ", " << regA[instr.src] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FDIV_M(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
asmCode << "\tcvtdq2pd " << tempRegx << ", qword ptr [" << regScratchpadAddr << "+rax]" << std::endl;
asmCode << "\tandps " << tempRegx << ", " << mantissaMaskReg << std::endl;
asmCode << "\torps " << tempRegx << ", " << exponentMaskReg << std::endl;
asmCode << "\tdivpd " << regE[instr.dst] << ", " << tempRegx << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_FSQRT_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
asmCode << "\tsqrtpd " << regE[instr.dst] << ", " << regE[instr.dst] << std::endl;
traceflt(instr);
}
void AssemblyGeneratorX86::h_CFROUND(Instruction& instr, int i) {
asmCode << "\tmov rax, " << regR[instr.src] << std::endl;
int rotate = (13 - (instr.getImm32() & 63)) & 63;
if (rotate != 0)
asmCode << "\trol rax, " << rotate << std::endl;
asmCode << "\tand eax, 24576" << std::endl;
asmCode << "\tor eax, 40896" << std::endl;
asmCode << "\tpush rax" << std::endl;
asmCode << "\tldmxcsr dword ptr [rsp]" << std::endl;
asmCode << "\tpop rax" << std::endl;
tracenop(instr);
}
void AssemblyGeneratorX86::h_CBRANCH(Instruction& instr, int i) {
int reg = instr.dst;
int target = registerUsage[reg] + 1;
int shift = instr.getModCond() + ConditionOffset;
int32_t imm = instr.getImm32() | (1L << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1L << (shift - 1));
asmCode << "\tadd " << regR[reg] << ", " << imm << std::endl;
asmCode << "\ttest " << regR[reg] << ", " << (ConditionMask << shift) << std::endl;
asmCode << "\tjz randomx_isn_" << target << std::endl;
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;
}
}
void AssemblyGeneratorX86::h_ISTORE(Instruction& instr, int i) {
genAddressRegDst(instr);
asmCode << "\tmov qword ptr [" << regScratchpadAddr << "+rax], " << regR[instr.src] << std::endl;
tracenop(instr);
}
void AssemblyGeneratorX86::h_NOP(Instruction& instr, int i) {
asmCode << "\tnop" << std::endl;
tracenop(instr);
}
#include "instruction_weights.hpp"
#define INST_HANDLE(x) REPN(&AssemblyGeneratorX86::h_##x, WT(x))
InstructionGenerator AssemblyGeneratorX86::engine[256] = {
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
INST_HANDLE(ISUB_M)
INST_HANDLE(IMUL_R)
INST_HANDLE(IMUL_M)
INST_HANDLE(IMULH_R)
INST_HANDLE(IMULH_M)
INST_HANDLE(ISMULH_R)
INST_HANDLE(ISMULH_M)
INST_HANDLE(IMUL_RCP)
INST_HANDLE(INEG_R)
INST_HANDLE(IXOR_R)
INST_HANDLE(IXOR_M)
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
INST_HANDLE(FSWAP_R)
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
INST_HANDLE(CBRANCH)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(NOP)
};
}

View File

@ -0,0 +1,94 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "common.hpp"
#include <sstream>
namespace randomx {
class Program;
class SuperscalarProgram;
class AssemblyGeneratorX86;
class Instruction;
typedef void(AssemblyGeneratorX86::*InstructionGenerator)(Instruction&, int);
class AssemblyGeneratorX86 {
public:
void generateProgram(Program& prog);
void generateAsm(SuperscalarProgram& prog);
void generateC(SuperscalarProgram& prog);
void printCode(std::ostream& os) {
os << asmCode.rdbuf();
}
private:
void genAddressReg(Instruction&, const char*);
void genAddressRegDst(Instruction&, int);
int32_t genAddressImm(Instruction&);
void generateCode(Instruction&, int);
void traceint(Instruction&);
void traceflt(Instruction&);
void tracenop(Instruction&);
void h_IADD_RS(Instruction&, int);
void h_IADD_M(Instruction&, int);
void h_ISUB_R(Instruction&, int);
void h_ISUB_M(Instruction&, int);
void h_IMUL_R(Instruction&, int);
void h_IMUL_M(Instruction&, int);
void h_IMULH_R(Instruction&, int);
void h_IMULH_M(Instruction&, int);
void h_ISMULH_R(Instruction&, int);
void h_ISMULH_M(Instruction&, int);
void h_IMUL_RCP(Instruction&, int);
void h_INEG_R(Instruction&, int);
void h_IXOR_R(Instruction&, int);
void h_IXOR_M(Instruction&, int);
void h_IROR_R(Instruction&, int);
void h_IROL_R(Instruction&, int);
void h_ISWAP_R(Instruction&, int);
void h_FSWAP_R(Instruction&, int);
void h_FADD_R(Instruction&, int);
void h_FADD_M(Instruction&, int);
void h_FSUB_R(Instruction&, int);
void h_FSUB_M(Instruction&, int);
void h_FSCAL_R(Instruction&, int);
void h_FMUL_R(Instruction&, int);
void h_FDIV_M(Instruction&, int);
void h_FSQRT_R(Instruction&, int);
void h_CBRANCH(Instruction&, int);
void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_NOP(Instruction&, int);
static InstructionGenerator engine[256];
std::stringstream asmCode;
int registerUsage[RegistersCount];
};
}

View File

@ -0,0 +1,76 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#ifndef PORTABLE_BLAKE2_IMPL_H
#define PORTABLE_BLAKE2_IMPL_H
#include <stdint.h>
#include "endian.h"
static FORCE_INLINE uint64_t load48(const void *src) {
const uint8_t *p = (const uint8_t *)src;
uint64_t w = *p++;
w |= (uint64_t)(*p++) << 8;
w |= (uint64_t)(*p++) << 16;
w |= (uint64_t)(*p++) << 24;
w |= (uint64_t)(*p++) << 32;
w |= (uint64_t)(*p++) << 40;
return w;
}
static FORCE_INLINE void store48(void *dst, uint64_t w) {
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
}
static FORCE_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
return (w >> c) | (w << (32 - c));
}
static FORCE_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
return (w >> c) | (w << (64 - c));
}
#endif

116
randomx/blake2/blake2.h Normal file
View File

@ -0,0 +1,116 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#ifndef PORTABLE_BLAKE2_H
#define PORTABLE_BLAKE2_H
#include <stdint.h>
#include <limits.h>
#if defined(__cplusplus)
extern "C" {
#endif
enum blake2b_constant {
BLAKE2B_BLOCKBYTES = 128,
BLAKE2B_OUTBYTES = 64,
BLAKE2B_KEYBYTES = 64,
BLAKE2B_SALTBYTES = 16,
BLAKE2B_PERSONALBYTES = 16
};
#pragma pack(push, 1)
typedef struct __blake2b_param {
uint8_t digest_length; /* 1 */
uint8_t key_length; /* 2 */
uint8_t fanout; /* 3 */
uint8_t depth; /* 4 */
uint32_t leaf_length; /* 8 */
uint64_t node_offset; /* 16 */
uint8_t node_depth; /* 17 */
uint8_t inner_length; /* 18 */
uint8_t reserved[14]; /* 32 */
uint8_t salt[BLAKE2B_SALTBYTES]; /* 48 */
uint8_t personal[BLAKE2B_PERSONALBYTES]; /* 64 */
} blake2b_param;
#pragma pack(pop)
typedef struct __blake2b_state {
uint64_t h[8];
uint64_t t[2];
uint64_t f[2];
uint8_t buf[BLAKE2B_BLOCKBYTES];
unsigned buflen;
unsigned outlen;
uint8_t last_node;
} blake2b_state;
/* Ensure param structs have not been wrongly padded */
/* Poor man's static_assert */
enum {
blake2_size_check_0 = 1 / !!(CHAR_BIT == 8),
blake2_size_check_2 =
1 / !!(sizeof(blake2b_param) == sizeof(uint64_t) * CHAR_BIT)
};
//randomx namespace
#define blake2b_init randomx_blake2b_init
#define blake2b_init_key randomx_blake2b_init_key
#define blake2b_init_param randomx_blake2b_init_param
#define blake2b_update randomx_blake2b_update
#define blake2b_final randomx_blake2b_final
#define blake2b randomx_blake2b
#define blake2b_long randomx_blake2b_long
/* Streaming API */
int blake2b_init(blake2b_state *S, size_t outlen);
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key,
size_t keylen);
int blake2b_init_param(blake2b_state *S, const blake2b_param *P);
int blake2b_update(blake2b_state *S, const void *in, size_t inlen);
int blake2b_final(blake2b_state *S, void *out, size_t outlen);
/* Simple API */
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
const void *key, size_t keylen);
/* Argon2 Team - Begin Code */
int blake2b_long(void *out, size_t outlen, const void *in, size_t inlen);
/* Argon2 Team - End Code */
#if defined(__cplusplus)
}
#endif
#endif

409
randomx/blake2/blake2b.c Normal file
View File

@ -0,0 +1,409 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include "blake2.h"
#include "blake2-impl.h"
static const uint64_t blake2b_IV[8] = {
UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179) };
static const unsigned int blake2b_sigma[12][16] = {
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
};
static FORCE_INLINE void blake2b_set_lastnode(blake2b_state *S) {
S->f[1] = (uint64_t)-1;
}
static FORCE_INLINE void blake2b_set_lastblock(blake2b_state *S) {
if (S->last_node) {
blake2b_set_lastnode(S);
}
S->f[0] = (uint64_t)-1;
}
static FORCE_INLINE void blake2b_increment_counter(blake2b_state *S,
uint64_t inc) {
S->t[0] += inc;
S->t[1] += (S->t[0] < inc);
}
static FORCE_INLINE void blake2b_invalidate_state(blake2b_state *S) {
//clear_internal_memory(S, sizeof(*S)); /* wipe */
blake2b_set_lastblock(S); /* invalidate for further use */
}
static FORCE_INLINE void blake2b_init0(blake2b_state *S) {
memset(S, 0, sizeof(*S));
memcpy(S->h, blake2b_IV, sizeof(S->h));
}
int blake2b_init_param(blake2b_state *S, const blake2b_param *P) {
const unsigned char *p = (const unsigned char *)P;
unsigned int i;
if (NULL == P || NULL == S) {
return -1;
}
blake2b_init0(S);
/* IV XOR Parameter Block */
for (i = 0; i < 8; ++i) {
S->h[i] ^= load64(&p[i * sizeof(S->h[i])]);
}
S->outlen = P->digest_length;
return 0;
}
/* Sequential blake2b initialization */
int blake2b_init(blake2b_state *S, size_t outlen) {
blake2b_param P;
if (S == NULL) {
return -1;
}
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
blake2b_invalidate_state(S);
return -1;
}
/* Setup Parameter Block for unkeyed BLAKE2 */
P.digest_length = (uint8_t)outlen;
P.key_length = 0;
P.fanout = 1;
P.depth = 1;
P.leaf_length = 0;
P.node_offset = 0;
P.node_depth = 0;
P.inner_length = 0;
memset(P.reserved, 0, sizeof(P.reserved));
memset(P.salt, 0, sizeof(P.salt));
memset(P.personal, 0, sizeof(P.personal));
return blake2b_init_param(S, &P);
}
int blake2b_init_key(blake2b_state *S, size_t outlen, const void *key, size_t keylen) {
blake2b_param P;
if (S == NULL) {
return -1;
}
if ((outlen == 0) || (outlen > BLAKE2B_OUTBYTES)) {
blake2b_invalidate_state(S);
return -1;
}
if ((key == 0) || (keylen == 0) || (keylen > BLAKE2B_KEYBYTES)) {
blake2b_invalidate_state(S);
return -1;
}
/* Setup Parameter Block for keyed BLAKE2 */
P.digest_length = (uint8_t)outlen;
P.key_length = (uint8_t)keylen;
P.fanout = 1;
P.depth = 1;
P.leaf_length = 0;
P.node_offset = 0;
P.node_depth = 0;
P.inner_length = 0;
memset(P.reserved, 0, sizeof(P.reserved));
memset(P.salt, 0, sizeof(P.salt));
memset(P.personal, 0, sizeof(P.personal));
if (blake2b_init_param(S, &P) < 0) {
blake2b_invalidate_state(S);
return -1;
}
{
uint8_t block[BLAKE2B_BLOCKBYTES];
memset(block, 0, BLAKE2B_BLOCKBYTES);
memcpy(block, key, keylen);
blake2b_update(S, block, BLAKE2B_BLOCKBYTES);
/* Burn the key from stack */
//clear_internal_memory(block, BLAKE2B_BLOCKBYTES);
}
return 0;
}
static void blake2b_compress(blake2b_state *S, const uint8_t *block) {
uint64_t m[16];
uint64_t v[16];
unsigned int i, r;
for (i = 0; i < 16; ++i) {
m[i] = load64(block + i * sizeof(m[i]));
}
for (i = 0; i < 8; ++i) {
v[i] = S->h[i];
}
v[8] = blake2b_IV[0];
v[9] = blake2b_IV[1];
v[10] = blake2b_IV[2];
v[11] = blake2b_IV[3];
v[12] = blake2b_IV[4] ^ S->t[0];
v[13] = blake2b_IV[5] ^ S->t[1];
v[14] = blake2b_IV[6] ^ S->f[0];
v[15] = blake2b_IV[7] ^ S->f[1];
#define G(r, i, a, b, c, d) \
do { \
a = a + b + m[blake2b_sigma[r][2 * i + 0]]; \
d = rotr64(d ^ a, 32); \
c = c + d; \
b = rotr64(b ^ c, 24); \
a = a + b + m[blake2b_sigma[r][2 * i + 1]]; \
d = rotr64(d ^ a, 16); \
c = c + d; \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define ROUND(r) \
do { \
G(r, 0, v[0], v[4], v[8], v[12]); \
G(r, 1, v[1], v[5], v[9], v[13]); \
G(r, 2, v[2], v[6], v[10], v[14]); \
G(r, 3, v[3], v[7], v[11], v[15]); \
G(r, 4, v[0], v[5], v[10], v[15]); \
G(r, 5, v[1], v[6], v[11], v[12]); \
G(r, 6, v[2], v[7], v[8], v[13]); \
G(r, 7, v[3], v[4], v[9], v[14]); \
} while ((void)0, 0)
for (r = 0; r < 12; ++r) {
ROUND(r);
}
for (i = 0; i < 8; ++i) {
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
}
#undef G
#undef ROUND
}
int blake2b_update(blake2b_state *S, const void *in, size_t inlen) {
const uint8_t *pin = (const uint8_t *)in;
if (inlen == 0) {
return 0;
}
/* Sanity check */
if (S == NULL || in == NULL) {
return -1;
}
/* Is this a reused state? */
if (S->f[0] != 0) {
return -1;
}
if (S->buflen + inlen > BLAKE2B_BLOCKBYTES) { //BLAKE2B_BLOCKBYTES =128
/* Complete current block */
size_t left = S->buflen;
size_t fill = BLAKE2B_BLOCKBYTES - left;
memcpy(&S->buf[left], pin, fill);
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, S->buf);
S->buflen = 0;
inlen -= fill;
pin += fill;
/* Avoid buffer copies when possible */
while (inlen > BLAKE2B_BLOCKBYTES) {
blake2b_increment_counter(S, BLAKE2B_BLOCKBYTES);
blake2b_compress(S, pin);
inlen -= BLAKE2B_BLOCKBYTES;
pin += BLAKE2B_BLOCKBYTES;
}
}
memcpy(&S->buf[S->buflen], pin, inlen);
S->buflen += (unsigned int)inlen;
return 0;
}
int blake2b_final(blake2b_state *S, void *out, size_t outlen) {
uint8_t buffer[BLAKE2B_OUTBYTES] = { 0 };
unsigned int i;
/* Sanity checks */
if (S == NULL || out == NULL || outlen < S->outlen) {
return -1;
}
/* Is this a reused state? */
if (S->f[0] != 0) {
return -1;
}
blake2b_increment_counter(S, S->buflen);
blake2b_set_lastblock(S);
memset(&S->buf[S->buflen], 0, BLAKE2B_BLOCKBYTES - S->buflen); /* Padding */
blake2b_compress(S, S->buf);
for (i = 0; i < 8; ++i) { /* Output full hash to temp buffer */
store64(buffer + sizeof(S->h[i]) * i, S->h[i]);
}
memcpy(out, buffer, S->outlen);
//clear_internal_memory(buffer, sizeof(buffer));
//clear_internal_memory(S->buf, sizeof(S->buf));
//clear_internal_memory(S->h, sizeof(S->h));
return 0;
}
int blake2b(void *out, size_t outlen, const void *in, size_t inlen,
const void *key, size_t keylen) {
blake2b_state S;
int ret = -1;
/* Verify parameters */
if (NULL == in && inlen > 0) {
goto fail;
}
if (NULL == out || outlen == 0 || outlen > BLAKE2B_OUTBYTES) {
goto fail;
}
if ((NULL == key && keylen > 0) || keylen > BLAKE2B_KEYBYTES) {
goto fail;
}
if (keylen > 0) {
if (blake2b_init_key(&S, outlen, key, keylen) < 0) {
goto fail;
}
}
else {
if (blake2b_init(&S, outlen) < 0) {
goto fail;
}
}
if (blake2b_update(&S, in, inlen) < 0) {
goto fail;
}
ret = blake2b_final(&S, out, outlen);
fail:
//clear_internal_memory(&S, sizeof(S));
return ret;
}
/* Argon2 Team - Begin Code */
int blake2b_long(void *pout, size_t outlen, const void *in, size_t inlen) {
uint8_t *out = (uint8_t *)pout;
blake2b_state blake_state;
uint8_t outlen_bytes[sizeof(uint32_t)] = { 0 };
int ret = -1;
if (outlen > UINT32_MAX) {
goto fail;
}
/* Ensure little-endian byte order! */
store32(outlen_bytes, (uint32_t)outlen);
#define TRY(statement) \
do { \
ret = statement; \
if (ret < 0) { \
goto fail; \
} \
} while ((void)0, 0)
if (outlen <= BLAKE2B_OUTBYTES) {
TRY(blake2b_init(&blake_state, outlen));
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(blake2b_update(&blake_state, in, inlen));
TRY(blake2b_final(&blake_state, out, outlen));
}
else {
uint32_t toproduce;
uint8_t out_buffer[BLAKE2B_OUTBYTES];
uint8_t in_buffer[BLAKE2B_OUTBYTES];
TRY(blake2b_init(&blake_state, BLAKE2B_OUTBYTES));
TRY(blake2b_update(&blake_state, outlen_bytes, sizeof(outlen_bytes)));
TRY(blake2b_update(&blake_state, in, inlen));
TRY(blake2b_final(&blake_state, out_buffer, BLAKE2B_OUTBYTES));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce = (uint32_t)outlen - BLAKE2B_OUTBYTES / 2;
while (toproduce > BLAKE2B_OUTBYTES) {
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer,
BLAKE2B_OUTBYTES, NULL, 0));
memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
out += BLAKE2B_OUTBYTES / 2;
toproduce -= BLAKE2B_OUTBYTES / 2;
}
memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
TRY(blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES, NULL,
0));
memcpy(out, out_buffer, toproduce);
}
fail:
//clear_internal_memory(&blake_state, sizeof(blake_state));
return ret;
#undef TRY
}
/* Argon2 Team - End Code */

View File

@ -0,0 +1,189 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#ifndef BLAKE_ROUND_MKA_OPT_H
#define BLAKE_ROUND_MKA_OPT_H
#include "blake2-impl.h"
#ifdef __GNUC__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
#define rotr16(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
#define rotr63(x) _mm256_xor_si256(_mm256_srli_epi64((x), 63), _mm256_add_epi64((x), (x)))
#define G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i ml = _mm256_mul_epu32(A0, B0); \
ml = _mm256_add_epi64(ml, ml); \
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
D0 = _mm256_xor_si256(D0, A0); \
D0 = rotr32(D0); \
\
ml = _mm256_mul_epu32(C0, D0); \
ml = _mm256_add_epi64(ml, ml); \
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
\
B0 = _mm256_xor_si256(B0, C0); \
B0 = rotr24(B0); \
\
ml = _mm256_mul_epu32(A1, B1); \
ml = _mm256_add_epi64(ml, ml); \
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
D1 = _mm256_xor_si256(D1, A1); \
D1 = rotr32(D1); \
\
ml = _mm256_mul_epu32(C1, D1); \
ml = _mm256_add_epi64(ml, ml); \
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
\
B1 = _mm256_xor_si256(B1, C1); \
B1 = rotr24(B1); \
} while((void)0, 0);
#define G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i ml = _mm256_mul_epu32(A0, B0); \
ml = _mm256_add_epi64(ml, ml); \
A0 = _mm256_add_epi64(A0, _mm256_add_epi64(B0, ml)); \
D0 = _mm256_xor_si256(D0, A0); \
D0 = rotr16(D0); \
\
ml = _mm256_mul_epu32(C0, D0); \
ml = _mm256_add_epi64(ml, ml); \
C0 = _mm256_add_epi64(C0, _mm256_add_epi64(D0, ml)); \
B0 = _mm256_xor_si256(B0, C0); \
B0 = rotr63(B0); \
\
ml = _mm256_mul_epu32(A1, B1); \
ml = _mm256_add_epi64(ml, ml); \
A1 = _mm256_add_epi64(A1, _mm256_add_epi64(B1, ml)); \
D1 = _mm256_xor_si256(D1, A1); \
D1 = rotr16(D1); \
\
ml = _mm256_mul_epu32(C1, D1); \
ml = _mm256_add_epi64(ml, ml); \
C1 = _mm256_add_epi64(C1, _mm256_add_epi64(D1, ml)); \
B1 = _mm256_xor_si256(B1, C1); \
B1 = rotr63(B1); \
} while((void)0, 0);
#define DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(0, 3, 2, 1)); \
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(2, 1, 0, 3)); \
\
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(0, 3, 2, 1)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(2, 1, 0, 3)); \
} while((void)0, 0);
#define DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0xCC); \
tmp2 = _mm256_blend_epi32(D0, D1, 0x33); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while(0);
#define UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
B0 = _mm256_permute4x64_epi64(B0, _MM_SHUFFLE(2, 1, 0, 3)); \
C0 = _mm256_permute4x64_epi64(C0, _MM_SHUFFLE(1, 0, 3, 2)); \
D0 = _mm256_permute4x64_epi64(D0, _MM_SHUFFLE(0, 3, 2, 1)); \
\
B1 = _mm256_permute4x64_epi64(B1, _MM_SHUFFLE(2, 1, 0, 3)); \
C1 = _mm256_permute4x64_epi64(C1, _MM_SHUFFLE(1, 0, 3, 2)); \
D1 = _mm256_permute4x64_epi64(D1, _MM_SHUFFLE(0, 3, 2, 1)); \
} while((void)0, 0);
#define UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
__m256i tmp1 = _mm256_blend_epi32(B0, B1, 0xCC); \
__m256i tmp2 = _mm256_blend_epi32(B0, B1, 0x33); \
B0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
B1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
\
tmp1 = C0; \
C0 = C1; \
C1 = tmp1; \
\
tmp1 = _mm256_blend_epi32(D0, D1, 0x33); \
tmp2 = _mm256_blend_epi32(D0, D1, 0xCC); \
D0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2,3,0,1)); \
D1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2,3,0,1)); \
} while((void)0, 0);
#define BLAKE2_ROUND_1(A0, A1, B0, B1, C0, C1, D0, D1) \
do{ \
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
DIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
\
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
UNDIAGONALIZE_1(A0, B0, C0, D0, A1, B1, C1, D1) \
} while((void)0, 0);
#define BLAKE2_ROUND_2(A0, A1, B0, B1, C0, C1, D0, D1) \
do{ \
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
DIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
G1_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
G2_AVX2(A0, A1, B0, B1, C0, C1, D0, D1) \
\
UNDIAGONALIZE_2(A0, A1, B0, B1, C0, C1, D0, D1) \
} while((void)0, 0);
#endif /* BLAKE_ROUND_MKA_OPT_H */

View File

@ -0,0 +1,73 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#ifndef BLAKE_ROUND_MKA_H
#define BLAKE_ROUND_MKA_H
#include "blake2.h"
#include "blake2-impl.h"
/* designed by the Lyra PHC team */
static FORCE_INLINE uint64_t fBlaMka(uint64_t x, uint64_t y) {
const uint64_t m = UINT64_C(0xFFFFFFFF);
const uint64_t xy = (x & m) * (y & m);
return x + y + 2 * xy;
}
#define G(a, b, c, d) \
do { \
a = fBlaMka(a, b); \
d = rotr64(d ^ a, 32); \
c = fBlaMka(c, d); \
b = rotr64(b ^ c, 24); \
a = fBlaMka(a, b); \
d = rotr64(d ^ a, 16); \
c = fBlaMka(c, d); \
b = rotr64(b ^ c, 63); \
} while ((void)0, 0)
#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, \
v12, v13, v14, v15) \
do { \
G(v0, v4, v8, v12); \
G(v1, v5, v9, v13); \
G(v2, v6, v10, v14); \
G(v3, v7, v11, v15); \
G(v0, v5, v10, v15); \
G(v1, v6, v11, v12); \
G(v2, v7, v8, v13); \
G(v3, v4, v9, v14); \
} while ((void)0, 0)
#endif

View File

@ -0,0 +1,162 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#ifndef BLAKE_ROUND_MKA_OPT_H
#define BLAKE_ROUND_MKA_OPT_H
#include "blake2-impl.h"
#ifdef __GNUC__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
#ifdef _mm_roti_epi64 //clang defines it using the XOP instruction set
#undef _mm_roti_epi64
#endif
#define r16 \
(_mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9))
#define r24 \
(_mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
#define _mm_roti_epi64(x, c) \
(-(c) == 32) \
? _mm_shuffle_epi32((x), _MM_SHUFFLE(2, 3, 0, 1)) \
: (-(c) == 24) \
? _mm_shuffle_epi8((x), r24) \
: (-(c) == 16) \
? _mm_shuffle_epi8((x), r16) \
: (-(c) == 63) \
? _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_add_epi64((x), (x))) \
: _mm_xor_si128(_mm_srli_epi64((x), -(c)), \
_mm_slli_epi64((x), 64 - (-(c))))
static FORCE_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
const __m128i z = _mm_mul_epu32(x, y);
return _mm_add_epi64(_mm_add_epi64(x, y), _mm_add_epi64(z, z));
}
#define G1(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = _mm_roti_epi64(D0, -32); \
D1 = _mm_roti_epi64(D1, -32); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = _mm_roti_epi64(B0, -24); \
B1 = _mm_roti_epi64(B1, -24); \
} while ((void)0, 0)
#define G2(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
A0 = fBlaMka(A0, B0); \
A1 = fBlaMka(A1, B1); \
\
D0 = _mm_xor_si128(D0, A0); \
D1 = _mm_xor_si128(D1, A1); \
\
D0 = _mm_roti_epi64(D0, -16); \
D1 = _mm_roti_epi64(D1, -16); \
\
C0 = fBlaMka(C0, D0); \
C1 = fBlaMka(C1, D1); \
\
B0 = _mm_xor_si128(B0, C0); \
B1 = _mm_xor_si128(B1, C1); \
\
B0 = _mm_roti_epi64(B0, -63); \
B1 = _mm_roti_epi64(B1, -63); \
} while ((void)0, 0)
#define DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B1, B0, 8); \
__m128i t1 = _mm_alignr_epi8(B0, B1, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = C0; \
C0 = C1; \
C1 = t0; \
\
t0 = _mm_alignr_epi8(D1, D0, 8); \
t1 = _mm_alignr_epi8(D0, D1, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#define UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1) \
do { \
__m128i t0 = _mm_alignr_epi8(B0, B1, 8); \
__m128i t1 = _mm_alignr_epi8(B1, B0, 8); \
B0 = t0; \
B1 = t1; \
\
t0 = C0; \
C0 = C1; \
C1 = t0; \
\
t0 = _mm_alignr_epi8(D0, D1, 8); \
t1 = _mm_alignr_epi8(D1, D0, 8); \
D0 = t1; \
D1 = t0; \
} while ((void)0, 0)
#define BLAKE2_ROUND(A0, A1, B0, B1, C0, C1, D0, D1) \
do { \
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
DIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
\
G1(A0, B0, C0, D0, A1, B1, C1, D1); \
G2(A0, B0, C0, D0, A1, B1, C1, D1); \
\
UNDIAGONALIZE(A0, B0, C0, D0, A1, B1, C1, D1); \
} while ((void)0, 0)
#endif /* BLAKE_ROUND_MKA_OPT_H */

107
randomx/blake2/endian.h Normal file
View File

@ -0,0 +1,107 @@
#pragma once
#include <stdint.h>
#include <string.h>
#if defined(_MSC_VER)
#define FORCE_INLINE __inline
#elif defined(__GNUC__) || defined(__clang__)
#define FORCE_INLINE __inline__
#else
#define FORCE_INLINE
#endif
/* Argon2 Team - Begin Code */
/*
Not an exhaustive list, but should cover the majority of modern platforms
Additionally, the code will always be correct---this is only a performance
tweak.
*/
#if (defined(__BYTE_ORDER__) && \
(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) || \
defined(__LITTLE_ENDIAN__) || defined(__ARMEL__) || defined(__MIPSEL__) || \
defined(__AARCH64EL__) || defined(__amd64__) || defined(__i386__) || \
defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || \
defined(_M_ARM)
#define NATIVE_LITTLE_ENDIAN
#endif
/* Argon2 Team - End Code */
static FORCE_INLINE uint32_t load32(const void *src) {
#if defined(NATIVE_LITTLE_ENDIAN)
uint32_t w;
memcpy(&w, src, sizeof w);
return w;
#else
const uint8_t *p = (const uint8_t *)src;
uint32_t w = *p++;
w |= (uint32_t)(*p++) << 8;
w |= (uint32_t)(*p++) << 16;
w |= (uint32_t)(*p++) << 24;
return w;
#endif
}
static FORCE_INLINE uint64_t load64_native(const void *src) {
uint64_t w;
memcpy(&w, src, sizeof w);
return w;
}
static FORCE_INLINE uint64_t load64(const void *src) {
#if defined(NATIVE_LITTLE_ENDIAN)
return load64_native(src);
#else
const uint8_t *p = (const uint8_t *)src;
uint64_t w = *p++;
w |= (uint64_t)(*p++) << 8;
w |= (uint64_t)(*p++) << 16;
w |= (uint64_t)(*p++) << 24;
w |= (uint64_t)(*p++) << 32;
w |= (uint64_t)(*p++) << 40;
w |= (uint64_t)(*p++) << 48;
w |= (uint64_t)(*p++) << 56;
return w;
#endif
}
static FORCE_INLINE void store32(void *dst, uint32_t w) {
#if defined(NATIVE_LITTLE_ENDIAN)
memcpy(dst, &w, sizeof w);
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
#endif
}
static FORCE_INLINE void store64_native(void *dst, uint64_t w) {
memcpy(dst, &w, sizeof w);
}
static FORCE_INLINE void store64(void *dst, uint64_t w) {
#if defined(NATIVE_LITTLE_ENDIAN)
store64_native(dst, w);
#else
uint8_t *p = (uint8_t *)dst;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
w >>= 8;
*p++ = (uint8_t)w;
#endif
}

View File

@ -0,0 +1,62 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stddef.h>
#include "blake2/blake2.h"
#include "blake2/endian.h"
#include "blake2_generator.hpp"
namespace randomx {
constexpr int maxSeedSize = 60;
Blake2Generator::Blake2Generator(const void* seed, size_t seedSize, int nonce) : dataIndex(sizeof(data)) {
memset(data, 0, sizeof(data));
memcpy(data, seed, seedSize > maxSeedSize ? maxSeedSize : seedSize);
store32(&data[maxSeedSize], nonce);
}
uint8_t Blake2Generator::getByte() {
checkData(1);
return data[dataIndex++];
}
uint32_t Blake2Generator::getUInt32() {
checkData(4);
auto ret = load32(&data[dataIndex]);
dataIndex += 4;
return ret;
}
void Blake2Generator::checkData(const size_t bytesNeeded) {
if (dataIndex + bytesNeeded > sizeof(data)) {
blake2b(data, sizeof(data), data, sizeof(data), nullptr, 0);
dataIndex = 0;
}
}
}

View File

@ -0,0 +1,46 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
namespace randomx {
class Blake2Generator {
public:
Blake2Generator(const void* seed, size_t seedSize, int nonce = 0);
uint8_t getByte();
uint32_t getUInt32();
private:
void checkData(const size_t);
uint8_t data[64];
size_t dataIndex;
};
}

View File

@ -0,0 +1,494 @@
/*
Copyright (c) 2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "bytecode_machine.hpp"
#include "reciprocal.h"
namespace randomx {
const int_reg_t BytecodeMachine::zero = 0;
#define INSTR_CASE(x) case InstructionType::x: \
exe_ ## x(ibc, pc, scratchpad, config); \
break;
void BytecodeMachine::executeInstruction(RANDOMX_EXE_ARGS) {
switch (ibc.type)
{
INSTR_CASE(IADD_RS)
INSTR_CASE(IADD_M)
INSTR_CASE(ISUB_R)
INSTR_CASE(ISUB_M)
INSTR_CASE(IMUL_R)
INSTR_CASE(IMUL_M)
INSTR_CASE(IMULH_R)
INSTR_CASE(IMULH_M)
INSTR_CASE(ISMULH_R)
INSTR_CASE(ISMULH_M)
INSTR_CASE(INEG_R)
INSTR_CASE(IXOR_R)
INSTR_CASE(IXOR_M)
INSTR_CASE(IROR_R)
INSTR_CASE(IROL_R)
INSTR_CASE(ISWAP_R)
INSTR_CASE(FSWAP_R)
INSTR_CASE(FADD_R)
INSTR_CASE(FADD_M)
INSTR_CASE(FSUB_R)
INSTR_CASE(FSUB_M)
INSTR_CASE(FSCAL_R)
INSTR_CASE(FMUL_R)
INSTR_CASE(FDIV_M)
INSTR_CASE(FSQRT_R)
INSTR_CASE(CBRANCH)
INSTR_CASE(CFROUND)
INSTR_CASE(ISTORE)
case InstructionType::NOP:
break;
case InstructionType::IMUL_RCP: //executed as IMUL_R
default:
UNREACHABLE;
}
}
void BytecodeMachine::compileInstruction(RANDOMX_GEN_ARGS) {
int opcode = instr.opcode;
// printf("nreg.r[0]=%016llx\n",nreg->r[0]);
// printf("nreg.r[1]=%016llx\n",nreg->r[1]);
// printf("nreg.r[2]=%016llx\n",nreg->r[2]);
// printf("nreg.r[3]=%016llx\n",nreg->r[3]);
// printf("nreg.r[4]=%016llx\n",nreg->r[4]);
// printf("nreg.r[5]=%016llx\n",nreg->r[5]);
// printf("nreg.r[6]=%016llx\n",nreg->r[6]);
// printf("nreg.r[7]=%016llx\n",nreg->r[7]);
//nreg.r 初始8个寄存器都是0
//printf("ceil_IADD_RS= %0d,ceil_IADD_M=%0d,ceil_ISUB_R=%0d\n",ceil_IADD_RS,ceil_IADD_M,ceil_ISUB_R);
if (opcode < ceil_IADD_RS) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_RS;
ibc.idst = &nreg->r[dst];
if (dst != RegisterNeedsDisplacement) { //RegisterNeedsDisplacement=5
ibc.isrc = &nreg->r[src];
ibc.shift = instr.getModShift();
ibc.imm = 0;
}
else {
ibc.isrc = &nreg->r[src];
ibc.shift = instr.getModShift();
ibc.imm = signExtend2sCompl(instr.getImm32());
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IADD_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IADD_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISUB_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISUB_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISUB_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMUL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMUL_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMUL_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_R;
ibc.idst = &nreg->r[dst];
ibc.isrc = &nreg->r[src];
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IMULH_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISMULH_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_R;
ibc.idst = &nreg->r[dst];
ibc.isrc = &nreg->r[src];
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISMULH_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISMULH_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IMUL_RCP) {
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::IMUL_R;
ibc.idst = &nreg->r[dst];
ibc.imm = randomx_reciprocal(divisor); //******8
ibc.isrc = &ibc.imm;
registerUsage[dst] = i;
}
else {
ibc.type = InstructionType::NOP;
}
return;
}
if (opcode < ceil_INEG_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::INEG_R;
ibc.idst = &nreg->r[dst];
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IXOR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = signExtend2sCompl(instr.getImm32());
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IXOR_M) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IXOR_M;
ibc.idst = &nreg->r[dst];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (src != dst) {
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
ibc.isrc = &zero;
ibc.memMask = ScratchpadL3Mask;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IROR_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROR_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_IROL_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::IROL_R;
ibc.idst = &nreg->r[dst];
if (src != dst) {
ibc.isrc = &nreg->r[src];
}
else {
ibc.imm = instr.getImm32();
ibc.isrc = &ibc.imm;
}
registerUsage[dst] = i;
return;
}
if (opcode < ceil_ISWAP_R) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
if (src != dst) {
ibc.idst = &nreg->r[dst];
ibc.isrc = &nreg->r[src];
ibc.type = InstructionType::ISWAP_R;
registerUsage[dst] = i;
registerUsage[src] = i;
}
else {
ibc.type = InstructionType::NOP;
}
return;
}
if (opcode < ceil_FSWAP_R) {
auto dst = instr.dst % RegistersCount;
ibc.type = InstructionType::FSWAP_R;
if (dst < RegisterCountFlt)
ibc.fdst = &nreg->f[dst];
else
ibc.fdst = &nreg->e[dst - RegisterCountFlt];
return;
}
if (opcode < ceil_FADD_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FADD_R;
ibc.fdst = &nreg->f[dst];
ibc.fsrc = &nreg->a[src];
return;
}
if (opcode < ceil_FADD_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FADD_M;
ibc.fdst = &nreg->f[dst];
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
if (opcode < ceil_FSUB_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FSUB_R;
ibc.fdst = &nreg->f[dst];
ibc.fsrc = &nreg->a[src];
return;
}
if (opcode < ceil_FSUB_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FSUB_M;
ibc.fdst = &nreg->f[dst];
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
if (opcode < ceil_FSCAL_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.fdst = &nreg->f[dst];
ibc.type = InstructionType::FSCAL_R;
return;
}
if (opcode < ceil_FMUL_R) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegisterCountFlt;
ibc.type = InstructionType::FMUL_R;
ibc.fdst = &nreg->e[dst];
ibc.fsrc = &nreg->a[src];
return;
}
if (opcode < ceil_FDIV_M) {
auto dst = instr.dst % RegisterCountFlt;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::FDIV_M;
ibc.fdst = &nreg->e[dst];
ibc.isrc = &nreg->r[src];
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
ibc.imm = signExtend2sCompl(instr.getImm32());
return;
}
if (opcode < ceil_FSQRT_R) {
auto dst = instr.dst % RegisterCountFlt;
ibc.type = InstructionType::FSQRT_R;
ibc.fdst = &nreg->e[dst];
return;
}
if (opcode < ceil_CBRANCH) {
ibc.type = InstructionType::CBRANCH;
//jump condition
int creg = instr.dst % RegistersCount;
ibc.idst = &nreg->r[creg];
ibc.target = registerUsage[creg];
int shift = instr.getModCond() + ConditionOffset;
ibc.imm = signExtend2sCompl(instr.getImm32()) | (1ULL << shift);
if (ConditionOffset > 0 || shift > 0) //clear the bit below the condition mask - this limits the number of successive jumps to 2
ibc.imm &= ~(1ULL << (shift - 1));
ibc.memMask = ConditionMask << shift;
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;
}
return;
}
if (opcode < ceil_CFROUND) {
auto src = instr.src % RegistersCount;
ibc.isrc = &nreg->r[src];
ibc.type = InstructionType::CFROUND;
ibc.imm = instr.getImm32() & 63;
return;
}
if (opcode < ceil_ISTORE) {
auto dst = instr.dst % RegistersCount;
auto src = instr.src % RegistersCount;
ibc.type = InstructionType::ISTORE;
ibc.idst = &nreg->r[dst];
ibc.isrc = &nreg->r[src];
ibc.imm = signExtend2sCompl(instr.getImm32());
if (instr.getModCond() < StoreL3Condition) //StoreL3Condition= 14
ibc.memMask = (instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
else
ibc.memMask = ScratchpadL3Mask;
return;
}
if (opcode < ceil_NOP) {
ibc.type = InstructionType::NOP;
return;
}
UNREACHABLE;
}
}

View File

@ -0,0 +1,322 @@
/*
Copyright (c) 2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "common.hpp"
#include "intrin_portable.h"
#include "instruction.hpp"
#include "program.hpp"
namespace randomx {
//register file in machine byte order
struct NativeRegisterFile {
int_reg_t r[RegistersCount] = { 0 };
rx_vec_f128 f[RegisterCountFlt];
rx_vec_f128 e[RegisterCountFlt];
rx_vec_f128 a[RegisterCountFlt];
};
struct InstructionByteCode {
union {
int_reg_t* idst;
rx_vec_f128* fdst;
};
union {
const int_reg_t* isrc;
const rx_vec_f128* fsrc;
};
union {
uint64_t imm;
int64_t simm;
};
InstructionType type;
union {
int16_t target;
uint16_t shift;
};
uint32_t memMask;
};
#define OPCODE_CEIL_DECLARE(curr, prev) constexpr int ceil_ ## curr = ceil_ ## prev + RANDOMX_FREQ_ ## curr;
constexpr int ceil_NULL = 0;
OPCODE_CEIL_DECLARE(IADD_RS, NULL);
OPCODE_CEIL_DECLARE(IADD_M, IADD_RS);
OPCODE_CEIL_DECLARE(ISUB_R, IADD_M);
OPCODE_CEIL_DECLARE(ISUB_M, ISUB_R);
OPCODE_CEIL_DECLARE(IMUL_R, ISUB_M);
OPCODE_CEIL_DECLARE(IMUL_M, IMUL_R);
OPCODE_CEIL_DECLARE(IMULH_R, IMUL_M);
OPCODE_CEIL_DECLARE(IMULH_M, IMULH_R);
OPCODE_CEIL_DECLARE(ISMULH_R, IMULH_M);
OPCODE_CEIL_DECLARE(ISMULH_M, ISMULH_R);
OPCODE_CEIL_DECLARE(IMUL_RCP, ISMULH_M);
OPCODE_CEIL_DECLARE(INEG_R, IMUL_RCP);
OPCODE_CEIL_DECLARE(IXOR_R, INEG_R);
OPCODE_CEIL_DECLARE(IXOR_M, IXOR_R);
OPCODE_CEIL_DECLARE(IROR_R, IXOR_M);
OPCODE_CEIL_DECLARE(IROL_R, IROR_R);
OPCODE_CEIL_DECLARE(ISWAP_R, IROL_R);
OPCODE_CEIL_DECLARE(FSWAP_R, ISWAP_R);
OPCODE_CEIL_DECLARE(FADD_R, FSWAP_R);
OPCODE_CEIL_DECLARE(FADD_M, FADD_R);
OPCODE_CEIL_DECLARE(FSUB_R, FADD_M);
OPCODE_CEIL_DECLARE(FSUB_M, FSUB_R);
OPCODE_CEIL_DECLARE(FSCAL_R, FSUB_M);
OPCODE_CEIL_DECLARE(FMUL_R, FSCAL_R);
OPCODE_CEIL_DECLARE(FDIV_M, FMUL_R);
OPCODE_CEIL_DECLARE(FSQRT_R, FDIV_M);
OPCODE_CEIL_DECLARE(CBRANCH, FSQRT_R);
OPCODE_CEIL_DECLARE(CFROUND, CBRANCH);
OPCODE_CEIL_DECLARE(ISTORE, CFROUND);
OPCODE_CEIL_DECLARE(NOP, ISTORE);
#undef OPCODE_CEIL_DECLARE
#define RANDOMX_EXE_ARGS InstructionByteCode& ibc, int& pc, uint8_t* scratchpad, ProgramConfiguration& config
#define RANDOMX_GEN_ARGS Instruction& instr, int i, InstructionByteCode& ibc
class BytecodeMachine;
typedef void(BytecodeMachine::*InstructionGenBytecode)(RANDOMX_GEN_ARGS);
class BytecodeMachine {
public:
void beginCompilation(NativeRegisterFile& regFile) {
for (unsigned i = 0; i < RegistersCount; ++i) {
registerUsage[i] = -1;
}
nreg = &regFile;
}
void compileProgram(Program& program, InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], NativeRegisterFile& regFile) {
beginCompilation(regFile);
for (unsigned i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) { //256
auto& instr = program(i);
auto& ibc = bytecode[i];
compileInstruction(instr, i, ibc);
}
}
static void executeBytecode(InstructionByteCode bytecode[RANDOMX_PROGRAM_SIZE], uint8_t* scratchpad, ProgramConfiguration& config) {
for (int pc = 0; pc < RANDOMX_PROGRAM_SIZE; ++pc) {
auto& ibc = bytecode[pc];
executeInstruction(ibc, pc, scratchpad, config);
}
}
void compileInstruction(RANDOMX_GEN_ARGS)
#ifdef RANDOMX_GEN_TABLE
{
auto generator = genTable[instr.opcode];
(this->*generator)(instr, i, ibc);
}
#else
;
#endif
static void executeInstruction(RANDOMX_EXE_ARGS);
static void exe_IADD_RS(RANDOMX_EXE_ARGS) {
*ibc.idst += (*ibc.isrc << ibc.shift) + ibc.imm;
}
static void exe_IADD_M(RANDOMX_EXE_ARGS) {
*ibc.idst += load64(getScratchpadAddress(ibc, scratchpad));
}
static void exe_ISUB_R(RANDOMX_EXE_ARGS) {
*ibc.idst -= *ibc.isrc;
}
static void exe_ISUB_M(RANDOMX_EXE_ARGS) {
*ibc.idst -= load64(getScratchpadAddress(ibc, scratchpad));
}
static void exe_IMUL_R(RANDOMX_EXE_ARGS) {
*ibc.idst *= *ibc.isrc;
}
static void exe_IMUL_M(RANDOMX_EXE_ARGS) {
*ibc.idst *= load64(getScratchpadAddress(ibc, scratchpad));
}
static void exe_IMULH_R(RANDOMX_EXE_ARGS) {
*ibc.idst = mulh(*ibc.idst, *ibc.isrc);
}
static void exe_IMULH_M(RANDOMX_EXE_ARGS) {
*ibc.idst = mulh(*ibc.idst, load64(getScratchpadAddress(ibc, scratchpad)));
}
static void exe_ISMULH_R(RANDOMX_EXE_ARGS) {
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(*ibc.isrc));
}
static void exe_ISMULH_M(RANDOMX_EXE_ARGS) {
*ibc.idst = smulh(unsigned64ToSigned2sCompl(*ibc.idst), unsigned64ToSigned2sCompl(load64(getScratchpadAddress(ibc, scratchpad))));
}
static void exe_INEG_R(RANDOMX_EXE_ARGS) {
*ibc.idst = ~(*ibc.idst) + 1; //two's complement negative
}
static void exe_IXOR_R(RANDOMX_EXE_ARGS) {
*ibc.idst ^= *ibc.isrc;
}
static void exe_IXOR_M(RANDOMX_EXE_ARGS) {
*ibc.idst ^= load64(getScratchpadAddress(ibc, scratchpad));
}
static void exe_IROR_R(RANDOMX_EXE_ARGS) {
*ibc.idst = rotr(*ibc.idst, *ibc.isrc & 63);
}
static void exe_IROL_R(RANDOMX_EXE_ARGS) {
*ibc.idst = rotl(*ibc.idst, *ibc.isrc & 63);
}
static void exe_ISWAP_R(RANDOMX_EXE_ARGS) {
int_reg_t temp = *ibc.isrc;
*(int_reg_t*)ibc.isrc = *ibc.idst;
*ibc.idst = temp;
}
static void exe_FSWAP_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_swap_vec_f128(*ibc.fdst);
}
static void exe_FADD_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, *ibc.fsrc);
}
static void exe_FADD_M(RANDOMX_EXE_ARGS) {
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
*ibc.fdst = rx_add_vec_f128(*ibc.fdst, fsrc);
}
static void exe_FSUB_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, *ibc.fsrc);
}
static void exe_FSUB_M(RANDOMX_EXE_ARGS) {
rx_vec_f128 fsrc = rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad));
*ibc.fdst = rx_sub_vec_f128(*ibc.fdst, fsrc);
}
static void exe_FSCAL_R(RANDOMX_EXE_ARGS) {
const rx_vec_f128 mask = rx_set1_vec_f128(0x80F0000000000000);
*ibc.fdst = rx_xor_vec_f128(*ibc.fdst, mask);
}
static void exe_FMUL_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_mul_vec_f128(*ibc.fdst, *ibc.fsrc);
}
static void exe_FDIV_M(RANDOMX_EXE_ARGS) {
rx_vec_f128 fsrc = maskRegisterExponentMantissa(
config,
rx_cvt_packed_int_vec_f128(getScratchpadAddress(ibc, scratchpad))
);
*ibc.fdst = rx_div_vec_f128(*ibc.fdst, fsrc);
}
static void exe_FSQRT_R(RANDOMX_EXE_ARGS) {
*ibc.fdst = rx_sqrt_vec_f128(*ibc.fdst);
}
static void exe_CBRANCH(RANDOMX_EXE_ARGS) {
*ibc.idst += ibc.imm;
if ((*ibc.idst & ibc.memMask) == 0) {
pc = ibc.target;
}
}
static void exe_CFROUND(RANDOMX_EXE_ARGS) {
rx_set_rounding_mode(rotr(*ibc.isrc, ibc.imm) % 4);
}
static void exe_ISTORE(RANDOMX_EXE_ARGS) {
store64(scratchpad + ((*ibc.idst + ibc.imm) & ibc.memMask), *ibc.isrc);
}
protected:
static rx_vec_f128 maskRegisterExponentMantissa(ProgramConfiguration& config, rx_vec_f128 x) {
const rx_vec_f128 xmantissaMask = rx_set_vec_f128(dynamicMantissaMask, dynamicMantissaMask);
const rx_vec_f128 xexponentMask = rx_load_vec_f128((const double*)&config.eMask);
x = rx_and_vec_f128(x, xmantissaMask);
x = rx_or_vec_f128(x, xexponentMask);
return x;
}
private:
static const int_reg_t zero;
int registerUsage[RegistersCount];
NativeRegisterFile* nreg;
static void* getScratchpadAddress(InstructionByteCode& ibc, uint8_t* scratchpad) {
uint32_t addr = (*ibc.isrc + ibc.imm) & ibc.memMask;
return scratchpad + addr;
}
#ifdef RANDOMX_GEN_TABLE
static InstructionGenBytecode genTable[256];
void gen_IADD_RS(RANDOMX_GEN_ARGS);
void gen_IADD_M(RANDOMX_GEN_ARGS);
void gen_ISUB_R(RANDOMX_GEN_ARGS);
void gen_ISUB_M(RANDOMX_GEN_ARGS);
void gen_IMUL_R(RANDOMX_GEN_ARGS);
void gen_IMUL_M(RANDOMX_GEN_ARGS);
void gen_IMULH_R(RANDOMX_GEN_ARGS);
void gen_IMULH_M(RANDOMX_GEN_ARGS);
void gen_ISMULH_R(RANDOMX_GEN_ARGS);
void gen_ISMULH_M(RANDOMX_GEN_ARGS);
void gen_IMUL_RCP(RANDOMX_GEN_ARGS);
void gen_INEG_R(RANDOMX_GEN_ARGS);
void gen_IXOR_R(RANDOMX_GEN_ARGS);
void gen_IXOR_M(RANDOMX_GEN_ARGS);
void gen_IROR_R(RANDOMX_GEN_ARGS);
void gen_IROL_R(RANDOMX_GEN_ARGS);
void gen_ISWAP_R(RANDOMX_GEN_ARGS);
void gen_FSWAP_R(RANDOMX_GEN_ARGS);
void gen_FADD_R(RANDOMX_GEN_ARGS);
void gen_FADD_M(RANDOMX_GEN_ARGS);
void gen_FSUB_R(RANDOMX_GEN_ARGS);
void gen_FSUB_M(RANDOMX_GEN_ARGS);
void gen_FSCAL_R(RANDOMX_GEN_ARGS);
void gen_FMUL_R(RANDOMX_GEN_ARGS);
void gen_FDIV_M(RANDOMX_GEN_ARGS);
void gen_FSQRT_R(RANDOMX_GEN_ARGS);
void gen_CBRANCH(RANDOMX_GEN_ARGS);
void gen_CFROUND(RANDOMX_GEN_ARGS);
void gen_ISTORE(RANDOMX_GEN_ARGS);
void gen_NOP(RANDOMX_GEN_ARGS);
#endif
};
}

187
randomx/common.hpp Normal file
View File

@ -0,0 +1,187 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include <iostream>
#include <climits>
#include "blake2/endian.h"
#include "configuration.h"
#include "randomx.h"
namespace randomx {
static_assert(RANDOMX_ARGON_MEMORY >= 8, "RANDOMX_ARGON_MEMORY must be at least 8.");
static_assert(RANDOMX_ARGON_MEMORY <= 2097152, "RANDOMX_ARGON_MEMORY must not exceed 2097152.");
static_assert((RANDOMX_ARGON_MEMORY & (RANDOMX_ARGON_MEMORY - 1)) == 0, "RANDOMX_ARGON_MEMORY must be a power of 2.");
static_assert(RANDOMX_ARGON_ITERATIONS > 0 && RANDOMX_ARGON_ITERATIONS < UINT32_MAX, "RANDOMX_ARGON_ITERATIONS must be a positive 32-bit integer.");
static_assert(RANDOMX_ARGON_LANES > 0 && RANDOMX_ARGON_LANES <= 16777215, "RANDOMX_ARGON_LANES out of range");
static_assert(RANDOMX_DATASET_BASE_SIZE >= 64, "RANDOMX_DATASET_BASE_SIZE must be at least 64.");
static_assert((RANDOMX_DATASET_BASE_SIZE & (RANDOMX_DATASET_BASE_SIZE - 1)) == 0, "RANDOMX_DATASET_BASE_SIZE must be a power of 2.");
static_assert(RANDOMX_DATASET_BASE_SIZE <= 4294967296ULL, "RANDOMX_DATASET_BASE_SIZE must not exceed 4294967296.");
static_assert(RANDOMX_DATASET_EXTRA_SIZE % 64 == 0, "RANDOMX_DATASET_EXTRA_SIZE must be divisible by 64.");
static_assert((uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE <= 17179869184, "Dataset size must not exceed 16 GiB.");
static_assert(RANDOMX_PROGRAM_SIZE > 0, "RANDOMX_PROGRAM_SIZE must be greater than 0");
static_assert(RANDOMX_PROGRAM_SIZE <= 32768, "RANDOMX_PROGRAM_SIZE must not exceed 32768");
static_assert(RANDOMX_PROGRAM_ITERATIONS > 0, "RANDOMX_PROGRAM_ITERATIONS must be greater than 0");
static_assert(RANDOMX_PROGRAM_COUNT > 0, "RANDOMX_PROGRAM_COUNT must be greater than 0");
static_assert((RANDOMX_SCRATCHPAD_L3 & (RANDOMX_SCRATCHPAD_L3 - 1)) == 0, "RANDOMX_SCRATCHPAD_L3 must be a power of 2.");
static_assert(RANDOMX_SCRATCHPAD_L3 >= RANDOMX_SCRATCHPAD_L2, "RANDOMX_SCRATCHPAD_L3 must be greater than or equal to RANDOMX_SCRATCHPAD_L2.");
static_assert((RANDOMX_SCRATCHPAD_L2 & (RANDOMX_SCRATCHPAD_L2 - 1)) == 0, "RANDOMX_SCRATCHPAD_L2 must be a power of 2.");
static_assert(RANDOMX_SCRATCHPAD_L2 >= RANDOMX_SCRATCHPAD_L1, "RANDOMX_SCRATCHPAD_L2 must be greater than or equal to RANDOMX_SCRATCHPAD_L1.");
static_assert(RANDOMX_SCRATCHPAD_L1 >= 64, "RANDOMX_SCRATCHPAD_L1 must be at least 64.");
static_assert((RANDOMX_SCRATCHPAD_L1 & (RANDOMX_SCRATCHPAD_L1 - 1)) == 0, "RANDOMX_SCRATCHPAD_L1 must be a power of 2.");
static_assert(RANDOMX_CACHE_ACCESSES > 1, "RANDOMX_CACHE_ACCESSES must be greater than 1");
static_assert(RANDOMX_SUPERSCALAR_LATENCY > 0, "RANDOMX_SUPERSCALAR_LATENCY must be greater than 0");
static_assert(RANDOMX_SUPERSCALAR_LATENCY <= 10000, "RANDOMX_SUPERSCALAR_LATENCY must not exceed 10000");
static_assert(RANDOMX_JUMP_BITS > 0, "RANDOMX_JUMP_BITS must be greater than 0.");
static_assert(RANDOMX_JUMP_OFFSET >= 0, "RANDOMX_JUMP_OFFSET must be greater than or equal to 0.");
static_assert(RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET <= 16, "RANDOMX_JUMP_BITS + RANDOMX_JUMP_OFFSET must not exceed 16.");
constexpr int wtSum = RANDOMX_FREQ_IADD_RS + RANDOMX_FREQ_IADD_M + RANDOMX_FREQ_ISUB_R + \
RANDOMX_FREQ_ISUB_M + RANDOMX_FREQ_IMUL_R + RANDOMX_FREQ_IMUL_M + RANDOMX_FREQ_IMULH_R + \
RANDOMX_FREQ_IMULH_M + RANDOMX_FREQ_ISMULH_R + RANDOMX_FREQ_ISMULH_M + RANDOMX_FREQ_IMUL_RCP + \
RANDOMX_FREQ_INEG_R + RANDOMX_FREQ_IXOR_R + RANDOMX_FREQ_IXOR_M + RANDOMX_FREQ_IROR_R + RANDOMX_FREQ_IROL_R + RANDOMX_FREQ_ISWAP_R + \
RANDOMX_FREQ_FSWAP_R + RANDOMX_FREQ_FADD_R + RANDOMX_FREQ_FADD_M + RANDOMX_FREQ_FSUB_R + RANDOMX_FREQ_FSUB_M + \
RANDOMX_FREQ_FSCAL_R + RANDOMX_FREQ_FMUL_R + RANDOMX_FREQ_FDIV_M + RANDOMX_FREQ_FSQRT_R + RANDOMX_FREQ_CBRANCH + \
RANDOMX_FREQ_CFROUND + RANDOMX_FREQ_ISTORE + RANDOMX_FREQ_NOP;
static_assert(wtSum == 256, "Sum of instruction frequencies must be 256.");
constexpr uint32_t ArgonBlockSize = 1024;
constexpr int ArgonSaltSize = sizeof("" RANDOMX_ARGON_SALT) - 1;
static_assert(ArgonSaltSize >= 8, "RANDOMX_ARGON_SALT must be at least 8 characters long");
constexpr int SuperscalarMaxSize = 3 * RANDOMX_SUPERSCALAR_LATENCY + 2;
constexpr size_t CacheLineSize = RANDOMX_DATASET_ITEM_SIZE;
constexpr int ScratchpadSize = RANDOMX_SCRATCHPAD_L3;
constexpr uint32_t CacheLineAlignMask = (RANDOMX_DATASET_BASE_SIZE - 1) & ~(CacheLineSize - 1);
constexpr uint32_t CacheSize = RANDOMX_ARGON_MEMORY * ArgonBlockSize;
constexpr uint64_t DatasetSize = RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE;
constexpr uint32_t DatasetExtraItems = RANDOMX_DATASET_EXTRA_SIZE / RANDOMX_DATASET_ITEM_SIZE;
constexpr uint32_t ConditionMask = ((1 << RANDOMX_JUMP_BITS) - 1);
constexpr int ConditionOffset = RANDOMX_JUMP_OFFSET;
constexpr int StoreL3Condition = 14;
//Prevent some unsafe configurations.
#ifndef RANDOMX_UNSAFE
static_assert((uint64_t)ArgonBlockSize * RANDOMX_CACHE_ACCESSES * RANDOMX_ARGON_MEMORY + 33554432 >= (uint64_t)RANDOMX_DATASET_BASE_SIZE + RANDOMX_DATASET_EXTRA_SIZE, "Unsafe configuration: Memory-time tradeoffs");
static_assert((128 + RANDOMX_PROGRAM_SIZE * RANDOMX_FREQ_ISTORE / 256) * (RANDOMX_PROGRAM_COUNT * RANDOMX_PROGRAM_ITERATIONS) >= RANDOMX_SCRATCHPAD_L3, "Unsafe configuration: Insufficient Scratchpad writes");
static_assert(RANDOMX_PROGRAM_COUNT > 1, "Unsafe configuration: Program filtering strategies");
static_assert(RANDOMX_PROGRAM_SIZE >= 64, "Unsafe configuration: Low program entropy");
static_assert(RANDOMX_PROGRAM_ITERATIONS >= 400, "Unsafe configuration: High compilation overhead");
#endif
#ifdef TRACE
constexpr bool trace = true;
#else
constexpr bool trace = false;
#endif
#ifndef UNREACHABLE
#ifdef __GNUC__
#define UNREACHABLE __builtin_unreachable()
#elif _MSC_VER
#define UNREACHABLE __assume(false)
#else
#define UNREACHABLE
#endif
#endif
#if defined(_M_X64) || defined(__x86_64__)
#define RANDOMX_HAVE_COMPILER 1
class JitCompilerX86;
using JitCompiler = JitCompilerX86;
#elif defined(__aarch64__)
#define RANDOMX_HAVE_COMPILER 1
class JitCompilerA64;
using JitCompiler = JitCompilerA64;
#else
#define RANDOMX_HAVE_COMPILER 0
class JitCompilerFallback;
using JitCompiler = JitCompilerFallback;
#endif
using addr_t = uint32_t;
using int_reg_t = uint64_t;
struct fpu_reg_t {
double lo;
double hi;
};
constexpr uint32_t ScratchpadL1 = RANDOMX_SCRATCHPAD_L1 / sizeof(int_reg_t);
constexpr uint32_t ScratchpadL2 = RANDOMX_SCRATCHPAD_L2 / sizeof(int_reg_t);
constexpr uint32_t ScratchpadL3 = RANDOMX_SCRATCHPAD_L3 / sizeof(int_reg_t);
constexpr int ScratchpadL1Mask = (ScratchpadL1 - 1) * 8;
constexpr int ScratchpadL2Mask = (ScratchpadL2 - 1) * 8;
constexpr int ScratchpadL1Mask16 = (ScratchpadL1 / 2 - 1) * 16;
constexpr int ScratchpadL2Mask16 = (ScratchpadL2 / 2 - 1) * 16;
constexpr int ScratchpadL3Mask = (ScratchpadL3 - 1) * 8;
constexpr int ScratchpadL3Mask64 = (ScratchpadL3 / 8 - 1) * 64;
constexpr int RegistersCount = 8;
constexpr int RegisterCountFlt = RegistersCount / 2;
constexpr int RegisterNeedsDisplacement = 5; //x86 r13 register
constexpr int RegisterNeedsSib = 4; //x86 r12 register
inline bool isZeroOrPowerOf2(uint64_t x) {
return (x & (x - 1)) == 0;
}
constexpr int mantissaSize = 52;
constexpr int exponentSize = 11;
constexpr uint64_t mantissaMask = (1ULL << mantissaSize) - 1;
constexpr uint64_t exponentMask = (1ULL << exponentSize) - 1;
constexpr int exponentBias = 1023;
constexpr int dynamicExponentBits = 4;
constexpr int staticExponentBits = 4;
constexpr uint64_t constExponentBits = 0x300;
constexpr uint64_t dynamicMantissaMask = (1ULL << (mantissaSize + dynamicExponentBits)) - 1;
struct MemoryRegisters {
addr_t mx, ma;
uint8_t* memory = nullptr;
};
//register file in little-endian byte order
struct RegisterFile {
int_reg_t r[RegistersCount];
fpu_reg_t f[RegisterCountFlt];
fpu_reg_t e[RegisterCountFlt];
fpu_reg_t a[RegisterCountFlt];
};
typedef void(ProgramFunc)(RegisterFile&, MemoryRegisters&, uint8_t* /* scratchpad */, uint64_t);
typedef void(DatasetInitFunc)(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
typedef void(DatasetDeallocFunc)(randomx_dataset*);
typedef void(CacheDeallocFunc)(randomx_cache*);
typedef void(CacheInitializeFunc)(randomx_cache*, const void*, size_t);
}

125
randomx/configuration.h Normal file
View File

@ -0,0 +1,125 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
//Cache size in KiB. Must be a power of 2.
#define RANDOMX_ARGON_MEMORY 262144
//Number of Argon2d iterations for Cache initialization.
#define RANDOMX_ARGON_ITERATIONS 3
//Number of parallel lanes for Cache initialization.
#define RANDOMX_ARGON_LANES 1
//Argon2d salt
#define RANDOMX_ARGON_SALT "RandomX\x03"
//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8
//Target latency for SuperscalarHash (in cycles of the reference CPU).
#define RANDOMX_SUPERSCALAR_LATENCY 170
//Dataset base size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_BASE_SIZE 2147483648
//Dataset extra size. Must be divisible by 64.
#define RANDOMX_DATASET_EXTRA_SIZE 33554368
//Number of instructions in a RandomX program. Must be divisible by 8.
#define RANDOMX_PROGRAM_SIZE 256
//Number of iterations during VM execution.
#define RANDOMX_PROGRAM_ITERATIONS 2048
//Number of chained VM executions per hash.
#define RANDOMX_PROGRAM_COUNT 8
//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 2097152
//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 262144
//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384
//Jump condition mask size in bits.
#define RANDOMX_JUMP_BITS 8
//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
#define RANDOMX_JUMP_OFFSET 8
/*
Instruction frequencies (per 256 opcodes)
Total sum of frequencies must be 256
*/
//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
#define RANDOMX_FREQ_IMUL_R 16
#define RANDOMX_FREQ_IMUL_M 4
#define RANDOMX_FREQ_IMULH_R 4
#define RANDOMX_FREQ_IMULH_M 1
#define RANDOMX_FREQ_ISMULH_R 4
#define RANDOMX_FREQ_ISMULH_M 1
#define RANDOMX_FREQ_IMUL_RCP 8
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_ISWAP_R 4
//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6
//Control instructions
#define RANDOMX_FREQ_CBRANCH 25
#define RANDOMX_FREQ_CFROUND 1
//Store instruction
#define RANDOMX_FREQ_ISTORE 16
//No-op instruction
#define RANDOMX_FREQ_NOP 0
/* ------
256
*/

72
randomx/cpu.cpp Normal file
View File

@ -0,0 +1,72 @@
/*
Copyright (c) 2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cpu.hpp"
#if defined(_M_X64) || defined(__x86_64__)
#define HAVE_CPUID
#ifdef _WIN32
#include <intrin.h>
#define cpuid(info, x) __cpuidex(info, x, 0)
#else //GCC
#include <cpuid.h>
void cpuid(int info[4], int InfoType) {
__cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}
#endif
#endif
#if defined(HAVE_HWCAP)
#include <sys/auxv.h>
#include <asm/hwcap.h>
#endif
namespace randomx {
Cpu::Cpu() : aes_(false), ssse3_(false), avx2_(false) {
#ifdef HAVE_CPUID
int info[4];
cpuid(info, 0);
int nIds = info[0];
if (nIds >= 0x00000001) {
cpuid(info, 0x00000001);
ssse3_ = (info[2] & (1 << 9)) != 0;
aes_ = (info[2] & (1 << 25)) != 0;
}
if (nIds >= 0x00000007) {
cpuid(info, 0x00000007);
avx2_ = (info[1] & (1 << 5)) != 0;
}
#elif defined(__aarch64__) && defined(HWCAP_AES)
long hwcaps = getauxval(AT_HWCAP);
aes_ = (hwcaps & HWCAP_AES) != 0;
#endif
//TODO POWER8 AES
}
}

49
randomx/cpu.hpp Normal file
View File

@ -0,0 +1,49 @@
/*
Copyright (c) 2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
namespace randomx {
class Cpu {
public:
Cpu();
bool hasAes() const {
return aes_;
}
bool hasSsse3() const {
return ssse3_;
}
bool hasAvx2() const {
return avx2_;
}
private:
bool aes_, ssse3_, avx2_;
};
}

212
randomx/dataset.cpp Normal file
View File

@ -0,0 +1,212 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Original code from Argon2 reference source code package used under CC0 Licence
* https://github.com/P-H-C/phc-winner-argon2
* Copyright 2015
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
*/
#include <new>
#include <algorithm>
#include <stdexcept>
#include <cstring>
#include <limits>
#include <cstring>
#include <cassert>
#include "common.hpp"
#include "dataset.hpp"
#include "virtual_memory.hpp"
#include "superscalar.hpp"
#include "blake2_generator.hpp"
#include "reciprocal.h"
#include "blake2/endian.h"
#include "argon2.h"
#include "argon2_core.h"
#include "jit_compiler.hpp"
#include "intrin_portable.h"
static_assert(RANDOMX_ARGON_MEMORY % (RANDOMX_ARGON_LANES * ARGON2_SYNC_POINTS) == 0, "RANDOMX_ARGON_MEMORY - invalid value");
static_assert(ARGON2_BLOCK_SIZE == randomx::ArgonBlockSize, "Unpexpected value of ARGON2_BLOCK_SIZE");
namespace randomx {
template<class Allocator>
void deallocCache(randomx_cache* cache) {
if (cache->memory != nullptr)
Allocator::freeMemory(cache->memory, CacheSize);
if (cache->jit != nullptr)
delete cache->jit;
}
template void deallocCache<DefaultAllocator>(randomx_cache* cache);
template void deallocCache<LargePageAllocator>(randomx_cache* cache);
void initCache(randomx_cache* cache, const void* key, size_t keySize) {
uint32_t memory_blocks, segment_length;
argon2_instance_t instance;
argon2_context context;
context.out = nullptr;
context.outlen = 0;
context.pwd = CONST_CAST(uint8_t *)key;
context.pwdlen = (uint32_t)keySize;
context.salt = CONST_CAST(uint8_t *)RANDOMX_ARGON_SALT;
context.saltlen = (uint32_t)randomx::ArgonSaltSize;
context.secret = NULL;
context.secretlen = 0;
context.ad = NULL;
context.adlen = 0;
context.t_cost = RANDOMX_ARGON_ITERATIONS;
context.m_cost = RANDOMX_ARGON_MEMORY;
context.lanes = RANDOMX_ARGON_LANES; //1
context.threads = 1;
context.allocate_cbk = NULL;
context.free_cbk = NULL;
context.flags = ARGON2_DEFAULT_FLAGS;
context.version = ARGON2_VERSION_NUMBER;
int inputsValid = randomx_argon2_validate_inputs(&context);
assert(inputsValid == ARGON2_OK);
/* 2. Align memory size */
/* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
memory_blocks = context.m_cost; //262144
segment_length = memory_blocks / (context.lanes * ARGON2_SYNC_POINTS); //ARGON2_SYNC_POINTS=4
instance.version = context.version;
instance.memory = NULL;
instance.passes = context.t_cost;
instance.memory_blocks = memory_blocks;
instance.segment_length = segment_length;
instance.lane_length = segment_length * ARGON2_SYNC_POINTS;
instance.lanes = context.lanes;
instance.threads = context.threads;
instance.type = Argon2_d;
instance.memory = (block*)cache->memory;
instance.impl = cache->argonImpl;
if (instance.threads > instance.lanes) {
instance.threads = instance.lanes;
}
/* 3. Initialization: Hashing inputs, allocating memory, filling first
* blocks
*/
randomx_argon2_initialize(&instance, &context);
randomx_argon2_fill_memory_blocks(&instance);
cache->reciprocalCache.clear();
randomx::Blake2Generator gen(key, keySize);
for (int i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { //RANDOMX_CACHE_ACCESSES =8
randomx::generateSuperscalar(cache->programs[i], gen);
for (unsigned j = 0; j < cache->programs[i].getSize(); ++j) {
auto& instr = cache->programs[i](j);
if ((SuperscalarInstructionType)instr.opcode == SuperscalarInstructionType::IMUL_RCP) {
auto rcp = randomx_reciprocal(instr.getImm32());
instr.setImm32(cache->reciprocalCache.size());
cache->reciprocalCache.push_back(rcp);
}
}
}
printf("initial the cache finished\n");
}
void initCacheCompile(randomx_cache* cache, const void* key, size_t keySize) {
initCache(cache, key, keySize);
cache->jit->enableWriting();
cache->jit->generateSuperscalarHash(cache->programs, cache->reciprocalCache);
cache->jit->generateDatasetInitCode();
cache->jit->enableExecution();
}
constexpr uint64_t superscalarMul0 = 6364136223846793005ULL;
constexpr uint64_t superscalarAdd1 = 9298411001130361340ULL;
constexpr uint64_t superscalarAdd2 = 12065312585734608966ULL;
constexpr uint64_t superscalarAdd3 = 9306329213124626780ULL;
constexpr uint64_t superscalarAdd4 = 5281919268842080866ULL;
constexpr uint64_t superscalarAdd5 = 10536153434571861004ULL;
constexpr uint64_t superscalarAdd6 = 3398623926847679864ULL;
constexpr uint64_t superscalarAdd7 = 9549104520008361294ULL;
static inline uint8_t* getMixBlock(uint64_t registerValue, uint8_t *memory) {
constexpr uint32_t mask = CacheSize / CacheLineSize - 1;
return memory + (registerValue & mask) * CacheLineSize;
}
void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t itemNumber) {
//printf("xxxxxxxx\n");
int_reg_t rl[8];
uint8_t* mixBlock;
uint64_t registerValue = itemNumber;
rl[0] = (itemNumber + 1) * superscalarMul0;
rl[1] = rl[0] ^ superscalarAdd1;
rl[2] = rl[0] ^ superscalarAdd2;
rl[3] = rl[0] ^ superscalarAdd3;
rl[4] = rl[0] ^ superscalarAdd4;
rl[5] = rl[0] ^ superscalarAdd5;
rl[6] = rl[0] ^ superscalarAdd6;
rl[7] = rl[0] ^ superscalarAdd7;
for (unsigned i = 0; i < RANDOMX_CACHE_ACCESSES; ++i) { //RANDOMX_CACHE_ACCESSES=8
mixBlock = getMixBlock(registerValue, cache->memory);
rx_prefetch_nta(mixBlock);
SuperscalarProgram& prog = cache->programs[i];
executeSuperscalar(rl, prog, &cache->reciprocalCache);
for (unsigned q = 0; q < 8; ++q)
rl[q] ^= load64_native(mixBlock + 8 * q);
registerValue = rl[prog.getAddressRegister()];
}
memcpy(out, &rl, CacheLineSize);
}
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startItem, uint32_t endItem) {
printf("initial the dataset\n");
for (uint32_t itemNumber = startItem; itemNumber < endItem; ++itemNumber, dataset += CacheLineSize){
initDatasetItem(cache, dataset, itemNumber);
//if (itemNumber==(endItem-1))
//{
// printf("endItem= %0d\n",endItem);
// for (int i = 0; i < CacheLineSize; ++i)
// {
// printf("%02x ",dataset[i]);
// }
// printf("\n");
//}
}
}
}
//b3 1f 7e c5 cd 28 eb 4b b6 72 7e 15 7d b0 6a 63 0b d4 dc 32 fb 18 eb 25 b4 f2 09 9b 9b 5d 39 ab 0d 2d d0 e9 ed 5f b7 a5 ae 31 bc d1 8f 01 d4 04 91 aa 62 01 db 47 a7 0d 2b 42 b9 b3 43 cd 78 c9

94
randomx/dataset.hpp Normal file
View File

@ -0,0 +1,94 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include <vector>
#include <type_traits>
#include "common.hpp"
#include "superscalar_program.hpp"
#include "allocator.hpp"
#include "argon2.h"
/* Global scope for C binding */
struct randomx_dataset {
uint8_t* memory = nullptr;
randomx::DatasetDeallocFunc* dealloc;
};
/* Global scope for C binding */
struct randomx_cache {
uint8_t* memory = nullptr;
randomx::CacheDeallocFunc* dealloc;
randomx::JitCompiler* jit;
randomx::CacheInitializeFunc* initialize;
randomx::DatasetInitFunc* datasetInit;
randomx::SuperscalarProgram programs[RANDOMX_CACHE_ACCESSES]; //RANDOMX_CACHE_ACCESSES =8
std::vector<uint64_t> reciprocalCache;
std::string cacheKey;
randomx_argon2_impl* argonImpl;
bool isInitialized() {
return programs[0].getSize() != 0;
}
};
//A pointer to a standard-layout struct object points to its initial member
static_assert(std::is_standard_layout<randomx_dataset>(), "randomx_dataset must be a standard-layout struct");
//the following assert fails when compiling Debug in Visual Studio (JIT mode will crash in Debug)
static_assert(std::is_standard_layout<randomx_cache>(), "randomx_cache must be a standard-layout struct");
namespace randomx {
using DefaultAllocator = AlignedAllocator<CacheLineSize>;
template<class Allocator>
void deallocDataset(randomx_dataset* dataset) {
if (dataset->memory != nullptr)
Allocator::freeMemory(dataset->memory, DatasetSize);
}
template<class Allocator>
void deallocCache(randomx_cache* cache);
void initCache(randomx_cache*, const void*, size_t);
void initCacheCompile(randomx_cache*, const void*, size_t);
void initDatasetItem(randomx_cache* cache, uint8_t* out, uint64_t blockNumber);
void initDataset(randomx_cache* cache, uint8_t* dataset, uint32_t startBlock, uint32_t endBlock);
inline randomx_argon2_impl* selectArgonImpl(randomx_flags flags) {
if (flags & RANDOMX_FLAG_ARGON2_AVX2) {
return randomx_argon2_impl_avx2();
}
if (flags & RANDOMX_FLAG_ARGON2_SSSE3) {
return randomx_argon2_impl_ssse3();
}
return &randomx_argon2_fill_segment_ref;
}
}

390
randomx/instruction.cpp Normal file
View File

@ -0,0 +1,390 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "instruction.hpp"
#include "common.hpp"
namespace randomx {
void Instruction::print(std::ostream& os) const {
os << names[opcode] << " ";
auto handler = engine[opcode];
(this->*handler)(os);
}
void Instruction::genAddressReg(std::ostream& os, int srcIndex) const {
os << (getModMem() ? "L1" : "L2") << "[r" << srcIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
}
void Instruction::genAddressRegDst(std::ostream& os, int dstIndex) const {
if (getModCond() < StoreL3Condition)
os << (getModMem() ? "L1" : "L2");
else
os << "L3";
os << "[r" << dstIndex << std::showpos << (int32_t)getImm32() << std::noshowpos << "]";
}
void Instruction::genAddressImm(std::ostream& os) const {
os << "L3" << "[" << (getImm32() & ScratchpadL3Mask) << "]";
}
void Instruction::h_IADD_RS(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", r" << srcIndex;
if(dstIndex == RegisterNeedsDisplacement) {
os << ", " << (int32_t)getImm32();
}
os << ", SHFT " << getModShift() << std::endl;
}
void Instruction::h_IADD_M(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_ISUB_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
}
}
void Instruction::h_ISUB_M(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_IMUL_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
}
}
void Instruction::h_IMUL_M(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_IMULH_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
void Instruction::h_IMULH_M(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_ISMULH_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
void Instruction::h_ISMULH_M(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_INEG_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
os << "r" << dstIndex << std::endl;
}
void Instruction::h_IXOR_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << dstIndex << ", " << (int32_t)getImm32() << std::endl;
}
}
void Instruction::h_IXOR_M(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
else {
os << "r" << dstIndex << ", ";
genAddressImm(os);
os << std::endl;
}
}
void Instruction::h_IROR_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
}
}
void Instruction::h_IROL_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
if (dstIndex != srcIndex) {
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
else {
os << "r" << dstIndex << ", " << (getImm32() & 63) << std::endl;
}
}
void Instruction::h_IMUL_RCP(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
os << "r" << dstIndex << ", " << getImm32() << std::endl;
}
void Instruction::h_ISWAP_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", r" << srcIndex << std::endl;
}
void Instruction::h_FSWAP_R(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
const char reg = (dstIndex >= RegisterCountFlt) ? 'e' : 'f';
dstIndex %= RegisterCountFlt;
os << reg << dstIndex << std::endl;
}
void Instruction::h_FADD_R(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegisterCountFlt;
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FADD_M(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegistersCount;
os << "f" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
void Instruction::h_FSUB_R(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegisterCountFlt;
os << "f" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FSUB_M(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegistersCount;
os << "f" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
void Instruction::h_FSCAL_R(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
os << "f" << dstIndex << std::endl;
}
void Instruction::h_FMUL_R(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegisterCountFlt;
os << "e" << dstIndex << ", a" << srcIndex << std::endl;
}
void Instruction::h_FDIV_M(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
auto srcIndex = src % RegistersCount;
os << "e" << dstIndex << ", ";
genAddressReg(os, srcIndex);
os << std::endl;
}
void Instruction::h_FSQRT_R(std::ostream& os) const {
auto dstIndex = dst % RegisterCountFlt;
os << "e" << dstIndex << std::endl;
}
void Instruction::h_CFROUND(std::ostream& os) const {
auto srcIndex = src % RegistersCount;
os << "r" << srcIndex << ", " << (getImm32() & 63) << std::endl;
}
void Instruction::h_CBRANCH(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
os << "r" << dstIndex << ", " << (int32_t)getImm32() << ", COND " << (int)(getModCond()) << std::endl;
}
void Instruction::h_ISTORE(std::ostream& os) const {
auto dstIndex = dst % RegistersCount;
auto srcIndex = src % RegistersCount;
genAddressRegDst(os, dstIndex);
os << ", r" << srcIndex << std::endl;
}
void Instruction::h_NOP(std::ostream& os) const {
os << std::endl;
}
#include "instruction_weights.hpp"
#define INST_NAME(x) REPN(#x, WT(x))
#define INST_HANDLE(x) REPN(&Instruction::h_##x, WT(x))
const char* Instruction::names[256] = {
INST_NAME(IADD_RS)
INST_NAME(IADD_M)
INST_NAME(ISUB_R)
INST_NAME(ISUB_M)
INST_NAME(IMUL_R)
INST_NAME(IMUL_M)
INST_NAME(IMULH_R)
INST_NAME(IMULH_M)
INST_NAME(ISMULH_R)
INST_NAME(ISMULH_M)
INST_NAME(IMUL_RCP)
INST_NAME(INEG_R)
INST_NAME(IXOR_R)
INST_NAME(IXOR_M)
INST_NAME(IROR_R)
INST_NAME(IROL_R)
INST_NAME(ISWAP_R)
INST_NAME(FSWAP_R)
INST_NAME(FADD_R)
INST_NAME(FADD_M)
INST_NAME(FSUB_R)
INST_NAME(FSUB_M)
INST_NAME(FSCAL_R)
INST_NAME(FMUL_R)
INST_NAME(FDIV_M)
INST_NAME(FSQRT_R)
INST_NAME(CBRANCH)
INST_NAME(CFROUND)
INST_NAME(ISTORE)
INST_NAME(NOP)
};
InstructionFormatter Instruction::engine[256] = {
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
INST_HANDLE(ISUB_M)
INST_HANDLE(IMUL_R)
INST_HANDLE(IMUL_M)
INST_HANDLE(IMULH_R)
INST_HANDLE(IMULH_M)
INST_HANDLE(ISMULH_R)
INST_HANDLE(ISMULH_M)
INST_HANDLE(IMUL_RCP)
INST_HANDLE(INEG_R)
INST_HANDLE(IXOR_R)
INST_HANDLE(IXOR_M)
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
INST_HANDLE(FSWAP_R)
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
INST_HANDLE(CBRANCH)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(NOP)
};
}

149
randomx/instruction.hpp Normal file
View File

@ -0,0 +1,149 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include <iostream>
#include <type_traits>
#include "blake2/endian.h"
namespace randomx {
class Instruction;
typedef void(Instruction::*InstructionFormatter)(std::ostream&) const;
enum class InstructionType : uint16_t {
IADD_RS = 0,
IADD_M = 1,
ISUB_R = 2,
ISUB_M = 3,
IMUL_R = 4,
IMUL_M = 5,
IMULH_R = 6,
IMULH_M = 7,
ISMULH_R = 8,
ISMULH_M = 9,
IMUL_RCP = 10,
INEG_R = 11,
IXOR_R = 12,
IXOR_M = 13,
IROR_R = 14,
IROL_R = 15,
ISWAP_R = 16,
FSWAP_R = 17,
FADD_R = 18,
FADD_M = 19,
FSUB_R = 20,
FSUB_M = 21,
FSCAL_R = 22,
FMUL_R = 23,
FDIV_M = 24,
FSQRT_R = 25,
CBRANCH = 26,
CFROUND = 27,
ISTORE = 28,
NOP = 29,
};
class Instruction {
public:
uint32_t getImm32() const {
return load32(&imm32);
}
void setImm32(uint32_t val) {
return store32(&imm32, val);
}
const char* getName() const {
return names[opcode];
}
friend std::ostream& operator<<(std::ostream& os, const Instruction& i) {
i.print(os);
return os;
}
int getModMem() const {
return mod % 4; //bits 0-1
}
int getModShift() const {
return (mod >> 2) % 4; //bits 2-3
}
int getModCond() const {
return mod >> 4; //bits 4-7
}
void setMod(uint8_t val) {
mod = val;
}
uint8_t opcode;
uint8_t dst;
uint8_t src;
uint8_t mod;
uint32_t imm32;
private:
void print(std::ostream&) const;
static const char* names[256];
static InstructionFormatter engine[256];
void genAddressReg(std::ostream& os, int) const;
void genAddressImm(std::ostream& os) const;
void genAddressRegDst(std::ostream&, int) const;
void h_IADD_RS(std::ostream&) const;
void h_IADD_M(std::ostream&) const;
void h_ISUB_R(std::ostream&) const;
void h_ISUB_M(std::ostream&) const;
void h_IMUL_R(std::ostream&) const;
void h_IMUL_M(std::ostream&) const;
void h_IMULH_R(std::ostream&) const;
void h_IMULH_M(std::ostream&) const;
void h_ISMULH_R(std::ostream&) const;
void h_ISMULH_M(std::ostream&) const;
void h_IMUL_RCP(std::ostream&) const;
void h_INEG_R(std::ostream&) const;
void h_IXOR_R(std::ostream&) const;
void h_IXOR_M(std::ostream&) const;
void h_IROR_R(std::ostream&) const;
void h_IROL_R(std::ostream&) const;
void h_ISWAP_R(std::ostream&) const;
void h_FSWAP_R(std::ostream&) const;
void h_FADD_R(std::ostream&) const;
void h_FADD_M(std::ostream&) const;
void h_FSUB_R(std::ostream&) const;
void h_FSUB_M(std::ostream&) const;
void h_FSCAL_R(std::ostream&) const;
void h_FMUL_R(std::ostream&) const;
void h_FDIV_M(std::ostream&) const;
void h_FSQRT_R(std::ostream&) const;
void h_CBRANCH(std::ostream&) const;
void h_CFROUND(std::ostream&) const;
void h_ISTORE(std::ostream&) const;
void h_NOP(std::ostream&) const;
};
static_assert(sizeof(Instruction) == 8, "Invalid size of struct randomx::Instruction");
static_assert(std::is_standard_layout<Instruction>(), "randomx::Instruction must be a standard-layout struct");
}

View File

@ -0,0 +1,73 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#define REP0(x)
#define REP1(x) x,
#define REP2(x) REP1(x) x,
#define REP3(x) REP2(x) x,
#define REP4(x) REP3(x) x,
#define REP5(x) REP4(x) x,
#define REP6(x) REP5(x) x,
#define REP7(x) REP6(x) x,
#define REP8(x) REP7(x) x,
#define REP9(x) REP8(x) x,
#define REP10(x) REP9(x) x,
#define REP11(x) REP10(x) x,
#define REP12(x) REP11(x) x,
#define REP13(x) REP12(x) x,
#define REP14(x) REP13(x) x,
#define REP15(x) REP14(x) x,
#define REP16(x) REP15(x) x,
#define REP17(x) REP16(x) x,
#define REP18(x) REP17(x) x,
#define REP19(x) REP18(x) x,
#define REP20(x) REP19(x) x,
#define REP21(x) REP20(x) x,
#define REP22(x) REP21(x) x,
#define REP23(x) REP22(x) x,
#define REP24(x) REP23(x) x,
#define REP25(x) REP24(x) x,
#define REP26(x) REP25(x) x,
#define REP27(x) REP26(x) x,
#define REP28(x) REP27(x) x,
#define REP29(x) REP28(x) x,
#define REP30(x) REP29(x) x,
#define REP31(x) REP30(x) x,
#define REP32(x) REP31(x) x,
#define REP33(x) REP32(x) x,
#define REP40(x) REP32(x) REP8(x)
#define REP64(x) REP32(x) REP32(x)
#define REP128(x) REP32(x) REP32(x) REP32(x) REP32(x)
#define REP232(x) REP128(x) REP40(x) REP40(x) REP24(x)
#define REP256(x) REP128(x) REP128(x)
#define REPNX(x,N) REP##N(x)
#define REPN(x,N) REPNX(x,N)
#define NUM(x) x
#define WT(x) NUM(RANDOMX_FREQ_##x)

View File

@ -0,0 +1,193 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cfenv>
#include <cmath>
#include "common.hpp"
#include "intrin_portable.h"
#include "blake2/endian.h"
#if defined(__SIZEOF_INT128__)
typedef unsigned __int128 uint128_t;
typedef __int128 int128_t;
uint64_t mulh(uint64_t a, uint64_t b) {
return ((uint128_t)a * b) >> 64;
}
int64_t smulh(int64_t a, int64_t b) {
return ((int128_t)a * b) >> 64;
}
#define HAVE_MULH
#define HAVE_SMULH
#endif
#if defined(_MSC_VER)
#define HAS_VALUE(X) X ## 0
#define EVAL_DEFINE(X) HAS_VALUE(X)
#include <intrin.h>
#include <stdlib.h>
uint64_t rotl(uint64_t x, unsigned int c) {
return _rotl64(x, c);
}
uint64_t rotr(uint64_t x, unsigned int c) {
return _rotr64(x, c);
}
#define HAVE_ROTL
#define HAVE_ROTR
#if EVAL_DEFINE(__MACHINEARM64_X64(1))
uint64_t mulh(uint64_t a, uint64_t b) {
return __umulh(a, b);
}
#define HAVE_MULH
#endif
#if EVAL_DEFINE(__MACHINEX64(1))
int64_t smulh(int64_t a, int64_t b) {
int64_t hi;
_mul128(a, b, &hi);
return hi;
}
#define HAVE_SMULH
#endif
static void setRoundMode_(uint32_t mode) {
_controlfp(mode, _MCW_RC);
}
#define HAVE_SETROUNDMODE_IMPL
#endif
#ifndef HAVE_SETROUNDMODE_IMPL
static void setRoundMode_(uint32_t mode) {
fesetround(mode);
}
#endif
#ifndef HAVE_ROTR
uint64_t rotr(uint64_t a, unsigned int b) {
return (a >> b) | (a << (-b & 63));
}
#define HAVE_ROTR
#endif
#ifndef HAVE_ROTL
uint64_t rotl(uint64_t a, unsigned int b) {
return (a << b) | (a >> (-b & 63));
}
#define HAVE_ROTL
#endif
#ifndef HAVE_MULH
#define LO(x) ((x)&0xffffffff)
#define HI(x) ((x)>>32)
uint64_t mulh(uint64_t a, uint64_t b) {
uint64_t ah = HI(a), al = LO(a);
uint64_t bh = HI(b), bl = LO(b);
uint64_t x00 = al * bl;
uint64_t x01 = al * bh;
uint64_t x10 = ah * bl;
uint64_t x11 = ah * bh;
uint64_t m1 = LO(x10) + LO(x01) + HI(x00);
uint64_t m2 = HI(x10) + HI(x01) + LO(x11) + HI(m1);
uint64_t m3 = HI(x11) + HI(m2);
return (m3 << 32) + LO(m2);
}
#define HAVE_MULH
#endif
#ifndef HAVE_SMULH
int64_t smulh(int64_t a, int64_t b) {
int64_t hi = mulh(a, b);
if (a < 0LL) hi -= b;
if (b < 0LL) hi -= a;
return hi;
}
#define HAVE_SMULH
#endif
#ifdef RANDOMX_DEFAULT_FENV
void rx_reset_float_state() {
setRoundMode_(FE_TONEAREST);
rx_set_double_precision(); //set precision to 53 bits if needed by the platform
}
void rx_set_rounding_mode(uint32_t mode) {
switch (mode & 3) {
case RoundDown:
setRoundMode_(FE_DOWNWARD);
break;
case RoundUp:
setRoundMode_(FE_UPWARD);
break;
case RoundToZero:
setRoundMode_(FE_TOWARDZERO);
break;
case RoundToNearest:
setRoundMode_(FE_TONEAREST);
break;
default:
UNREACHABLE;
}
}
#endif
#ifdef RANDOMX_USE_X87
#if defined(_MSC_VER) && defined(_M_IX86)
void rx_set_double_precision() {
_control87(_PC_53, _MCW_PC);
}
#elif defined(__i386)
void rx_set_double_precision() {
uint16_t volatile x87cw;
asm volatile("fstcw %0" : "=m" (x87cw));
x87cw &= ~0x300;
x87cw |= 0x200;
asm volatile("fldcw %0" : : "m" (x87cw));
}
#endif
#endif //RANDOMX_USE_X87
union double_ser_t {
double f;
uint64_t i;
};
double loadDoublePortable(const void* addr) {
double_ser_t ds;
ds.i = load64(addr);
return ds.f;
}

738
randomx/intrin_portable.h Normal file
View File

@ -0,0 +1,738 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include "blake2/endian.h"
constexpr int32_t unsigned32ToSigned2sCompl(uint32_t x) {
return (-1 == ~0) ? (int32_t)x : (x > INT32_MAX ? (-(int32_t)(UINT32_MAX - x) - 1) : (int32_t)x);
}
constexpr int64_t unsigned64ToSigned2sCompl(uint64_t x) {
return (-1 == ~0) ? (int64_t)x : (x > INT64_MAX ? (-(int64_t)(UINT64_MAX - x) - 1) : (int64_t)x);
}
constexpr uint64_t signExtend2sCompl(uint32_t x) {
return (-1 == ~0) ? (int64_t)(int32_t)(x) : (x > INT32_MAX ? (x | 0xffffffff00000000ULL) : (uint64_t)x);
}
constexpr int RoundToNearest = 0;
constexpr int RoundDown = 1;
constexpr int RoundUp = 2;
constexpr int RoundToZero = 3;
//MSVC doesn't define __SSE2__, so we have to define it manually if SSE2 is available
#if !defined(__SSE2__) && (defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))
#define __SSE2__ 1
#endif
//MSVC doesn't define __AES__
#if defined(_MSC_VER) && defined(__SSE2__)
#define __AES__
#endif
//the library "sqrt" function provided by MSVC for x86 targets doesn't give
//the correct results, so we have to use inline assembly to call x87 fsqrt directly
#if !defined(__SSE2__)
#if defined(_MSC_VER) && defined(_M_IX86)
inline double __cdecl rx_sqrt(double x) {
__asm {
fld x
fsqrt
}
}
#define rx_sqrt rx_sqrt
void rx_set_double_precision();
#define RANDOMX_USE_X87
#elif defined(__i386)
void rx_set_double_precision();
#define RANDOMX_USE_X87
#endif
#endif //__SSE2__
#if !defined(rx_sqrt)
#define rx_sqrt sqrt
#endif
#if !defined(RANDOMX_USE_X87)
#define rx_set_double_precision(x)
#endif
#ifdef __SSE2__
#ifdef __GNUC__
#include <x86intrin.h>
#else
#include <intrin.h>
#endif
typedef __m128i rx_vec_i128;
typedef __m128d rx_vec_f128;
#define rx_aligned_alloc(a, b) _mm_malloc(a,b)
#define rx_aligned_free(a) _mm_free(a)
#define rx_prefetch_nta(x) _mm_prefetch((const char *)(x), _MM_HINT_NTA)
#define rx_load_vec_f128 _mm_load_pd
#define rx_store_vec_f128 _mm_store_pd
#define rx_add_vec_f128 _mm_add_pd
#define rx_sub_vec_f128 _mm_sub_pd
#define rx_mul_vec_f128 _mm_mul_pd
#define rx_div_vec_f128 _mm_div_pd
#define rx_sqrt_vec_f128 _mm_sqrt_pd
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
return _mm_shuffle_pd(a, a, 1);
}
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
return _mm_castsi128_pd(_mm_set_epi64x(x1, x0));
}
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
return _mm_castsi128_pd(_mm_set1_epi64x(x));
}
#define rx_xor_vec_f128 _mm_xor_pd
#define rx_and_vec_f128 _mm_and_pd
#define rx_or_vec_f128 _mm_or_pd
#ifdef __AES__
#define rx_aesenc_vec_i128 _mm_aesenc_si128
#define rx_aesdec_vec_i128 _mm_aesdec_si128
#define HAVE_AES 1
#endif //__AES__
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
return _mm_cvtsi128_si32(a);
}
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0x55));
}
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xaa));
}
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
return _mm_cvtsi128_si32(_mm_shuffle_epi32(a, 0xff));
}
#define rx_set_int_vec_i128 _mm_set_epi32
#define rx_xor_vec_i128 _mm_xor_si128
#define rx_load_vec_i128 _mm_load_si128
#define rx_store_vec_i128 _mm_store_si128
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
__m128i ix = _mm_loadl_epi64((const __m128i*)addr);
return _mm_cvtepi32_pd(ix);
}
constexpr uint32_t rx_mxcsr_default = 0x9FC0; //Flush to zero, denormals are zero, default rounding mode, all exceptions disabled
FORCE_INLINE void rx_reset_float_state() {
_mm_setcsr(rx_mxcsr_default);
}
FORCE_INLINE void rx_set_rounding_mode(uint32_t mode) {
_mm_setcsr(rx_mxcsr_default | (mode << 13));
}
#elif defined(__PPC64__) && defined(__ALTIVEC__) && defined(__VSX__) //sadly only POWER7 and newer will be able to use SIMD acceleration. Earlier processors cant use doubles or 64 bit integers with SIMD
#include <cstdint>
#include <stdexcept>
#include <cstdlib>
#include <altivec.h>
#undef vector
#undef pixel
#undef bool
typedef __vector uint8_t __m128i;
typedef __vector uint32_t __m128l;
typedef __vector int __m128li;
typedef __vector uint64_t __m128ll;
typedef __vector double __m128d;
typedef __m128i rx_vec_i128;
typedef __m128d rx_vec_f128;
typedef union{
rx_vec_i128 i;
rx_vec_f128 d;
uint64_t u64[2];
double d64[2];
uint32_t u32[4];
int i32[4];
} vec_u;
#define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
/* Splat 64-bit long long to 2 64-bit long longs */
FORCE_INLINE __m128i vec_splat2sd (int64_t scalar)
{ return (__m128i) vec_splats (scalar); }
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
#if defined(NATIVE_LITTLE_ENDIAN)
return (rx_vec_f128)vec_vsx_ld(0,pd);
#else
vec_u t;
t.u64[0] = load64(pd + 0);
t.u64[1] = load64(pd + 1);
return (rx_vec_f128)t.d;
#endif
}
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
#if defined(NATIVE_LITTLE_ENDIAN)
vec_vsx_st(a,0,(rx_vec_f128*)mem_addr);
#else
vec_u _a;
_a.d = a;
store64(mem_addr + 0, _a.u64[0]);
store64(mem_addr + 1, _a.u64[1]);
#endif
}
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
return (rx_vec_f128)vec_perm((__m128i)a,(__m128i)a,(__m128i){8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7});
}
FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return (rx_vec_f128)vec_add(a,b);
}
FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return (rx_vec_f128)vec_sub(a,b);
}
FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return (rx_vec_f128)vec_mul(a,b);
}
FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return (rx_vec_f128)vec_div(a,b);
}
FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
return (rx_vec_f128)vec_sqrt(a);
}
FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
return (rx_vec_i128)vec_splat2sd(a);
}
FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
return (rx_vec_f128)a;
}
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
return (rx_vec_f128)(__m128ll){x0,x1};
}
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
return (rx_vec_f128)vec_splat2sd(x);
}
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return (rx_vec_f128)vec_xor(a,b);
}
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return (rx_vec_f128)vec_and(a,b);
}
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return (rx_vec_f128)vec_or(a,b);
}
#if defined(__CRYPTO__)
FORCE_INLINE __m128ll vrev(__m128i v){
#if defined(NATIVE_LITTLE_ENDIAN)
return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0});
#else
return (__m128ll)vec_perm((__m128i)v,(__m128i){0},(__m128i){3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12});
#endif
}
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
__m128ll _v = vrev(v);
__m128ll _rkey = vrev(rkey);
__m128ll result = vrev((__m128i)__builtin_crypto_vcipher(_v,_rkey));
return (rx_vec_i128)result;
}
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
__m128ll _v = vrev(v);
__m128ll zero = (__m128ll){0};
__m128ll out = vrev((__m128i)__builtin_crypto_vncipher(_v,zero));
return (rx_vec_i128)vec_xor((__m128i)out,rkey);
}
#define HAVE_AES 1
#endif //__CRYPTO__
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
vec_u _a;
_a.i = a;
return _a.i32[0];
}
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
vec_u _a;
_a.i = a;
return _a.i32[1];
}
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
vec_u _a;
_a.i = a;
return _a.i32[2];
}
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
vec_u _a;
_a.i = a;
return _a.i32[3];
}
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
return (rx_vec_i128)((__m128li){_I0,_I1,_I2,_I3});
};
FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) {
return (rx_vec_i128)vec_xor(_A,_B);
}
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const *_P) {
#if defined(NATIVE_LITTLE_ENDIAN)
return *_P;
#else
uint32_t* ptr = (uint32_t*)_P;
vec_u c;
c.u32[0] = load32(ptr + 0);
c.u32[1] = load32(ptr + 1);
c.u32[2] = load32(ptr + 2);
c.u32[3] = load32(ptr + 3);
return (rx_vec_i128)c.i;
#endif
}
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) {
#if defined(NATIVE_LITTLE_ENDIAN)
*_P = _B;
#else
uint32_t* ptr = (uint32_t*)_P;
vec_u B;
B.i = _B;
store32(ptr + 0, B.u32[0]);
store32(ptr + 1, B.u32[1]);
store32(ptr + 2, B.u32[2]);
store32(ptr + 3, B.u32[3]);
#endif
}
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
vec_u x;
x.d64[0] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
x.d64[1] = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
return (rx_vec_f128)x.d;
}
#define RANDOMX_DEFAULT_FENV
#elif defined(__aarch64__)
#include <stdlib.h>
#include <arm_neon.h>
#include <arm_acle.h>
typedef uint8x16_t rx_vec_i128;
typedef float64x2_t rx_vec_f128;
inline void* rx_aligned_alloc(size_t size, size_t align) {
void* p;
if (posix_memalign(&p, align, size) == 0)
return p;
return 0;
};
#define rx_aligned_free(a) free(a)
inline void rx_prefetch_nta(void* ptr) {
asm volatile ("prfm pldl1strm, [%0]\n" : : "r" (ptr));
}
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
return vld1q_f64((const float64_t*)pd);
}
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 val) {
vst1q_f64((float64_t*)mem_addr, val);
}
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
float64x2_t temp;
temp = vcopyq_laneq_f64(temp, 1, a, 1);
a = vcopyq_laneq_f64(a, 1, a, 0);
return vcopyq_laneq_f64(a, 0, temp, 1);
}
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
uint64x2_t temp0 = vdupq_n_u64(x0);
uint64x2_t temp1 = vdupq_n_u64(x1);
return vreinterpretq_f64_u64(vcopyq_laneq_u64(temp0, 1, temp1, 0));
}
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
return vreinterpretq_f64_u64(vdupq_n_u64(x));
}
#define rx_add_vec_f128 vaddq_f64
#define rx_sub_vec_f128 vsubq_f64
#define rx_mul_vec_f128 vmulq_f64
#define rx_div_vec_f128 vdivq_f64
#define rx_sqrt_vec_f128 vsqrtq_f64
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return vreinterpretq_f64_u8(veorq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
}
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return vreinterpretq_f64_u8(vandq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
}
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
return vreinterpretq_f64_u8(vorrq_u8(vreinterpretq_u8_f64(a), vreinterpretq_u8_f64(b)));
}
#ifdef __ARM_FEATURE_CRYPTO
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
const uint8x16_t zero = { 0 };
return vaesmcq_u8(vaeseq_u8(a, zero)) ^ key;
}
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 a, rx_vec_i128 key) {
const uint8x16_t zero = { 0 };
return vaesimcq_u8(vaesdq_u8(a, zero)) ^ key;
}
#define HAVE_AES 1
#endif
#define rx_xor_vec_i128 veorq_u8
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 0);
}
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 1);
}
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 2);
}
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
return vgetq_lane_s32(vreinterpretq_s32_u8(a), 3);
}
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
int32_t data[4];
data[0] = _I0;
data[1] = _I1;
data[2] = _I2;
data[3] = _I3;
return vreinterpretq_u8_s32(vld1q_s32(data));
};
#define rx_xor_vec_i128 veorq_u8
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(const rx_vec_i128* mem_addr) {
return vld1q_u8((const uint8_t*)mem_addr);
}
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128* mem_addr, rx_vec_i128 val) {
vst1q_u8((uint8_t*)mem_addr, val);
}
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
double lo = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
double hi = unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
rx_vec_f128 x;
x = vsetq_lane_f64(lo, x, 0);
x = vsetq_lane_f64(hi, x, 1);
return x;
}
#define RANDOMX_DEFAULT_FENV
#else //portable fallback
#include <cstdint>
#include <stdexcept>
#include <cstdlib>
#include <cmath>
typedef union {
uint64_t u64[2];
uint32_t u32[4];
uint16_t u16[8];
uint8_t u8[16];
} rx_vec_i128;
typedef union {
struct {
double lo;
double hi;
};
rx_vec_i128 i;
} rx_vec_f128;
#define rx_aligned_alloc(a, b) malloc(a)
#define rx_aligned_free(a) free(a)
#define rx_prefetch_nta(x)
FORCE_INLINE rx_vec_f128 rx_load_vec_f128(const double* pd) {
rx_vec_f128 x;
x.i.u64[0] = load64(pd + 0);
x.i.u64[1] = load64(pd + 1);
return x;
}
FORCE_INLINE void rx_store_vec_f128(double* mem_addr, rx_vec_f128 a) {
store64(mem_addr + 0, a.i.u64[0]);
store64(mem_addr + 1, a.i.u64[1]);
}
FORCE_INLINE rx_vec_f128 rx_swap_vec_f128(rx_vec_f128 a) {
double temp = a.hi;
a.hi = a.lo;
a.lo = temp;
return a;
}
FORCE_INLINE rx_vec_f128 rx_add_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
rx_vec_f128 x;
x.lo = a.lo + b.lo;
x.hi = a.hi + b.hi;
return x;
}
FORCE_INLINE rx_vec_f128 rx_sub_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
rx_vec_f128 x;
x.lo = a.lo - b.lo;
x.hi = a.hi - b.hi;
return x;
}
FORCE_INLINE rx_vec_f128 rx_mul_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
rx_vec_f128 x;
x.lo = a.lo * b.lo;
x.hi = a.hi * b.hi;
return x;
}
FORCE_INLINE rx_vec_f128 rx_div_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
rx_vec_f128 x;
x.lo = a.lo / b.lo;
x.hi = a.hi / b.hi;
return x;
}
FORCE_INLINE rx_vec_f128 rx_sqrt_vec_f128(rx_vec_f128 a) {
rx_vec_f128 x;
x.lo = rx_sqrt(a.lo);
x.hi = rx_sqrt(a.hi);
return x;
}
FORCE_INLINE rx_vec_i128 rx_set1_long_vec_i128(uint64_t a) {
rx_vec_i128 x;
x.u64[0] = a;
x.u64[1] = a;
return x;
}
FORCE_INLINE rx_vec_f128 rx_vec_i128_vec_f128(rx_vec_i128 a) {
rx_vec_f128 x;
x.i = a;
return x;
}
FORCE_INLINE rx_vec_f128 rx_set_vec_f128(uint64_t x1, uint64_t x0) {
rx_vec_f128 v;
v.i.u64[0] = x0;
v.i.u64[1] = x1;
return v;
}
FORCE_INLINE rx_vec_f128 rx_set1_vec_f128(uint64_t x) {
rx_vec_f128 v;
v.i.u64[0] = x;
v.i.u64[1] = x;
return v;
}
FORCE_INLINE rx_vec_f128 rx_xor_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
rx_vec_f128 x;
x.i.u64[0] = a.i.u64[0] ^ b.i.u64[0];
x.i.u64[1] = a.i.u64[1] ^ b.i.u64[1];
return x;
}
FORCE_INLINE rx_vec_f128 rx_and_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
rx_vec_f128 x;
x.i.u64[0] = a.i.u64[0] & b.i.u64[0];
x.i.u64[1] = a.i.u64[1] & b.i.u64[1];
return x;
}
FORCE_INLINE rx_vec_f128 rx_or_vec_f128(rx_vec_f128 a, rx_vec_f128 b) {
rx_vec_f128 x;
x.i.u64[0] = a.i.u64[0] | b.i.u64[0];
x.i.u64[1] = a.i.u64[1] | b.i.u64[1];
return x;
}
FORCE_INLINE int rx_vec_i128_x(rx_vec_i128 a) {
return a.u32[0];
}
FORCE_INLINE int rx_vec_i128_y(rx_vec_i128 a) {
return a.u32[1];
}
FORCE_INLINE int rx_vec_i128_z(rx_vec_i128 a) {
return a.u32[2];
}
FORCE_INLINE int rx_vec_i128_w(rx_vec_i128 a) {
return a.u32[3];
}
FORCE_INLINE rx_vec_i128 rx_set_int_vec_i128(int _I3, int _I2, int _I1, int _I0) {
rx_vec_i128 v;
v.u32[0] = _I0;
v.u32[1] = _I1;
v.u32[2] = _I2;
v.u32[3] = _I3;
return v;
};
FORCE_INLINE rx_vec_i128 rx_xor_vec_i128(rx_vec_i128 _A, rx_vec_i128 _B) {
rx_vec_i128 c;
c.u32[0] = _A.u32[0] ^ _B.u32[0];
c.u32[1] = _A.u32[1] ^ _B.u32[1];
c.u32[2] = _A.u32[2] ^ _B.u32[2];
c.u32[3] = _A.u32[3] ^ _B.u32[3];
return c;
}
FORCE_INLINE rx_vec_i128 rx_load_vec_i128(rx_vec_i128 const*_P) {
#if defined(NATIVE_LITTLE_ENDIAN)
return *_P;
#else
uint32_t* ptr = (uint32_t*)_P;
rx_vec_i128 c;
c.u32[0] = load32(ptr + 0);
c.u32[1] = load32(ptr + 1);
c.u32[2] = load32(ptr + 2);
c.u32[3] = load32(ptr + 3);
return c;
#endif
}
FORCE_INLINE void rx_store_vec_i128(rx_vec_i128 *_P, rx_vec_i128 _B) {
#if defined(NATIVE_LITTLE_ENDIAN)
*_P = _B;
#else
uint32_t* ptr = (uint32_t*)_P;
store32(ptr + 0, _B.u32[0]);
store32(ptr + 1, _B.u32[1]);
store32(ptr + 2, _B.u32[2]);
store32(ptr + 3, _B.u32[3]);
#endif
}
FORCE_INLINE rx_vec_f128 rx_cvt_packed_int_vec_f128(const void* addr) {
rx_vec_f128 x;
x.lo = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 0));
x.hi = (double)unsigned32ToSigned2sCompl(load32((uint8_t*)addr + 4));
return x;
}
#define RANDOMX_DEFAULT_FENV
#endif
#ifndef HAVE_AES
static const char* platformError = "Platform doesn't support hardware AES";
#include <stdexcept>
FORCE_INLINE rx_vec_i128 rx_aesenc_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
throw std::runtime_error(platformError);
}
FORCE_INLINE rx_vec_i128 rx_aesdec_vec_i128(rx_vec_i128 v, rx_vec_i128 rkey) {
throw std::runtime_error(platformError);
}
#define HAVE_AES 0
#endif
#ifdef RANDOMX_DEFAULT_FENV
void rx_reset_float_state();
void rx_set_rounding_mode(uint32_t mode);
#endif
double loadDoublePortable(const void* addr);
uint64_t mulh(uint64_t, uint64_t);
int64_t smulh(int64_t, int64_t);
uint64_t rotl(uint64_t, unsigned int);
uint64_t rotr(uint64_t, unsigned int);

37
randomx/jit_compiler.hpp Normal file
View File

@ -0,0 +1,37 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#if defined(_M_X64) || defined(__x86_64__)
#include "jit_compiler_x86.hpp"
#elif defined(__aarch64__)
#include "jit_compiler_a64.hpp"
#else
#include "jit_compiler_fallback.hpp"
#endif

View File

@ -0,0 +1,76 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include <vector>
#include <stdexcept>
#include "common.hpp"
namespace randomx {
class Program;
class ProgramConfiguration;
class SuperscalarProgram;
class JitCompilerFallback {
public:
JitCompilerFallback() {
throw std::runtime_error("JIT compilation is not supported on this platform");
}
void generateProgram(Program&, ProgramConfiguration&) {
}
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t) {
}
template<size_t N>
void generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &) {
}
void generateDatasetInitCode() {
}
ProgramFunc* getProgramFunc() {
return nullptr;
}
DatasetInitFunc* getDatasetInitFunc() {
return nullptr;
}
uint8_t* getCode() {
return nullptr;
}
size_t getCodeSize() {
return 0;
}
void enableWriting() {}
void enableExecution() {}
void enableAll() {}
};
}

View File

@ -0,0 +1,845 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdexcept>
#include <cstring>
#include <climits>
#include "jit_compiler_x86.hpp"
#include "jit_compiler_x86_static.hpp"
#include "superscalar.hpp"
#include "program.hpp"
#include "reciprocal.h"
#include "virtual_memory.hpp"
namespace randomx {
/*
REGISTER ALLOCATION:
; rax -> temporary
; rbx -> iteration counter "ic"
; rcx -> temporary
; rdx -> temporary
; rsi -> scratchpad pointer
; rdi -> dataset pointer
; rbp -> memory registers "ma" (high 32 bits), "mx" (low 32 bits)
; rsp -> stack pointer
; r8 -> "r0"
; r9 -> "r1"
; r10 -> "r2"
; r11 -> "r3"
; r12 -> "r4"
; r13 -> "r5"
; r14 -> "r6"
; r15 -> "r7"
; xmm0 -> "f0"
; xmm1 -> "f1"
; xmm2 -> "f2"
; xmm3 -> "f3"
; xmm4 -> "e0"
; xmm5 -> "e1"
; xmm6 -> "e2"
; xmm7 -> "e3"
; xmm8 -> "a0"
; xmm9 -> "a1"
; xmm10 -> "a2"
; xmm11 -> "a3"
; xmm12 -> temporary
; xmm13 -> E 'and' mask = 0x00ffffffffffffff00ffffffffffffff
; xmm14 -> E 'or' mask = 0x3*00000000******3*00000000******
; xmm15 -> scale mask = 0x81f000000000000081f0000000000000
*/
//Calculate the required code buffer size that is sufficient for the largest possible program:
constexpr size_t MaxRandomXInstrCodeSize = 32; //FDIV_M requires up to 32 bytes of x86 code
constexpr size_t MaxSuperscalarInstrSize = 14; //IMUL_RCP requires 14 bytes of x86 code
constexpr size_t SuperscalarProgramHeader = 128; //overhead per superscalar program
constexpr size_t CodeAlign = 4096; //align code size to a multiple of 4 KiB
constexpr size_t ReserveCodeSize = CodeAlign; //function prologue/epilogue + reserve
constexpr size_t RandomXCodeSize = alignSize(ReserveCodeSize + MaxRandomXInstrCodeSize * RANDOMX_PROGRAM_SIZE, CodeAlign);
constexpr size_t SuperscalarSize = alignSize(ReserveCodeSize + (SuperscalarProgramHeader + MaxSuperscalarInstrSize * SuperscalarMaxSize) * RANDOMX_CACHE_ACCESSES, CodeAlign);
static_assert(RandomXCodeSize < INT32_MAX / 2, "RandomXCodeSize is too large");
static_assert(SuperscalarSize < INT32_MAX / 2, "SuperscalarSize is too large");
constexpr uint32_t CodeSize = RandomXCodeSize + SuperscalarSize;
constexpr int32_t superScalarHashOffset = RandomXCodeSize;
const uint8_t* codePrologue = (uint8_t*)&randomx_program_prologue;
const uint8_t* codeLoopBegin = (uint8_t*)&randomx_program_loop_begin;
const uint8_t* codeLoopLoad = (uint8_t*)&randomx_program_loop_load;
const uint8_t* codeProgamStart = (uint8_t*)&randomx_program_start;
const uint8_t* codeReadDataset = (uint8_t*)&randomx_program_read_dataset;
const uint8_t* codeReadDatasetLightSshInit = (uint8_t*)&randomx_program_read_dataset_sshash_init;
const uint8_t* codeReadDatasetLightSshFin = (uint8_t*)&randomx_program_read_dataset_sshash_fin;
const uint8_t* codeDatasetInit = (uint8_t*)&randomx_dataset_init;
const uint8_t* codeLoopStore = (uint8_t*)&randomx_program_loop_store;
const uint8_t* codeLoopEnd = (uint8_t*)&randomx_program_loop_end;
const uint8_t* codeEpilogue = (uint8_t*)&randomx_program_epilogue;
const uint8_t* codeProgramEnd = (uint8_t*)&randomx_program_end;
const uint8_t* codeShhLoad = (uint8_t*)&randomx_sshash_load;
const uint8_t* codeShhPrefetch = (uint8_t*)&randomx_sshash_prefetch;
const uint8_t* codeShhEnd = (uint8_t*)&randomx_sshash_end;
const uint8_t* codeShhInit = (uint8_t*)&randomx_sshash_init;
const int32_t prologueSize = codeLoopBegin - codePrologue;
const int32_t loopLoadSize = codeProgamStart - codeLoopLoad;
const int32_t readDatasetSize = codeReadDatasetLightSshInit - codeReadDataset;
const int32_t readDatasetLightInitSize = codeReadDatasetLightSshFin - codeReadDatasetLightSshInit;
const int32_t readDatasetLightFinSize = codeLoopStore - codeReadDatasetLightSshFin;
const int32_t loopStoreSize = codeLoopEnd - codeLoopStore;
const int32_t datasetInitSize = codeEpilogue - codeDatasetInit;
const int32_t epilogueSize = codeShhLoad - codeEpilogue;
const int32_t codeSshLoadSize = codeShhPrefetch - codeShhLoad;
const int32_t codeSshPrefetchSize = codeShhEnd - codeShhPrefetch;
const int32_t codeSshInitSize = codeProgramEnd - codeShhInit;
const int32_t epilogueOffset = CodeSize - epilogueSize;
static const uint8_t REX_ADD_RR[] = { 0x4d, 0x03 };
static const uint8_t REX_ADD_RM[] = { 0x4c, 0x03 };
static const uint8_t REX_SUB_RR[] = { 0x4d, 0x2b };
static const uint8_t REX_SUB_RM[] = { 0x4c, 0x2b };
static const uint8_t REX_MOV_RR[] = { 0x41, 0x8b };
static const uint8_t REX_MOV_RR64[] = { 0x49, 0x8b };
static const uint8_t REX_MOV_R64R[] = { 0x4c, 0x8b };
static const uint8_t REX_IMUL_RR[] = { 0x4d, 0x0f, 0xaf };
static const uint8_t REX_IMUL_RRI[] = { 0x4d, 0x69 };
static const uint8_t REX_IMUL_RM[] = { 0x4c, 0x0f, 0xaf };
static const uint8_t REX_MUL_R[] = { 0x49, 0xf7 };
static const uint8_t REX_MUL_M[] = { 0x48, 0xf7 };
static const uint8_t REX_81[] = { 0x49, 0x81 };
static const uint8_t AND_EAX_I = 0x25;
static const uint8_t MOV_EAX_I = 0xb8;
static const uint8_t MOV_RAX_I[] = { 0x48, 0xb8 };
static const uint8_t MOV_RCX_I[] = { 0x48, 0xb9 };
static const uint8_t REX_LEA[] = { 0x4f, 0x8d };
static const uint8_t REX_MUL_MEM[] = { 0x48, 0xf7, 0x24, 0x0e };
static const uint8_t REX_IMUL_MEM[] = { 0x48, 0xf7, 0x2c, 0x0e };
static const uint8_t REX_SHR_RAX[] = { 0x48, 0xc1, 0xe8 };
static const uint8_t RAX_ADD_SBB_1[] = { 0x48, 0x83, 0xC0, 0x01, 0x48, 0x83, 0xD8, 0x00 };
static const uint8_t MUL_RCX[] = { 0x48, 0xf7, 0xe1 };
static const uint8_t REX_SHR_RDX[] = { 0x48, 0xc1, 0xea };
static const uint8_t REX_SH[] = { 0x49, 0xc1 };
static const uint8_t MOV_RCX_RAX_SAR_RCX_63[] = { 0x48, 0x89, 0xc1, 0x48, 0xc1, 0xf9, 0x3f };
static const uint8_t AND_ECX_I[] = { 0x81, 0xe1 };
static const uint8_t ADD_RAX_RCX[] = { 0x48, 0x01, 0xC8 };
static const uint8_t SAR_RAX_I8[] = { 0x48, 0xC1, 0xF8 };
static const uint8_t NEG_RAX[] = { 0x48, 0xF7, 0xD8 };
static const uint8_t ADD_R_RAX[] = { 0x4C, 0x03 };
static const uint8_t XOR_EAX_EAX[] = { 0x33, 0xC0 };
static const uint8_t ADD_RDX_R[] = { 0x4c, 0x01 };
static const uint8_t SUB_RDX_R[] = { 0x4c, 0x29 };
static const uint8_t SAR_RDX_I8[] = { 0x48, 0xC1, 0xFA };
static const uint8_t TEST_RDX_RDX[] = { 0x48, 0x85, 0xD2 };
static const uint8_t SETS_AL_ADD_RDX_RAX[] = { 0x0F, 0x98, 0xC0, 0x48, 0x03, 0xD0 };
static const uint8_t REX_NEG[] = { 0x49, 0xF7 };
static const uint8_t REX_XOR_RR[] = { 0x4D, 0x33 };
static const uint8_t REX_XOR_RI[] = { 0x49, 0x81 };
static const uint8_t REX_XOR_RM[] = { 0x4c, 0x33 };
static const uint8_t REX_ROT_CL[] = { 0x49, 0xd3 };
static const uint8_t REX_ROT_I8[] = { 0x49, 0xc1 };
static const uint8_t SHUFPD[] = { 0x66, 0x0f, 0xc6 };
static const uint8_t REX_ADDPD[] = { 0x66, 0x41, 0x0f, 0x58 };
static const uint8_t REX_CVTDQ2PD_XMM12[] = { 0xf3, 0x44, 0x0f, 0xe6, 0x24, 0x06 };
static const uint8_t REX_SUBPD[] = { 0x66, 0x41, 0x0f, 0x5c };
static const uint8_t REX_XORPS[] = { 0x41, 0x0f, 0x57 };
static const uint8_t REX_MULPD[] = { 0x66, 0x41, 0x0f, 0x59 };
static const uint8_t REX_MAXPD[] = { 0x66, 0x41, 0x0f, 0x5f };
static const uint8_t REX_DIVPD[] = { 0x66, 0x41, 0x0f, 0x5e };
static const uint8_t SQRTPD[] = { 0x66, 0x0f, 0x51 };
static const uint8_t AND_OR_MOV_LDMXCSR[] = { 0x25, 0x00, 0x60, 0x00, 0x00, 0x0D, 0xC0, 0x9F, 0x00, 0x00, 0x50, 0x0F, 0xAE, 0x14, 0x24, 0x58 };
static const uint8_t ROL_RAX[] = { 0x48, 0xc1, 0xc0 };
static const uint8_t XOR_ECX_ECX[] = { 0x33, 0xC9 };
static const uint8_t REX_CMP_R32I[] = { 0x41, 0x81 };
static const uint8_t REX_CMP_M32I[] = { 0x81, 0x3c, 0x06 };
static const uint8_t MOVAPD[] = { 0x66, 0x0f, 0x29 };
static const uint8_t REX_MOV_MR[] = { 0x4c, 0x89 };
static const uint8_t REX_XOR_EAX[] = { 0x41, 0x33 };
static const uint8_t SUB_EBX[] = { 0x83, 0xEB, 0x01 };
static const uint8_t JNZ[] = { 0x0f, 0x85 };
static const uint8_t JMP = 0xe9;
static const uint8_t REX_XOR_RAX_R64[] = { 0x49, 0x33 };
static const uint8_t REX_XCHG[] = { 0x4d, 0x87 };
static const uint8_t REX_ANDPS_XMM12[] = { 0x45, 0x0F, 0x54, 0xE5, 0x45, 0x0F, 0x56, 0xE6 };
static const uint8_t REX_PADD[] = { 0x66, 0x44, 0x0f };
static const uint8_t PADD_OPCODES[] = { 0xfc, 0xfd, 0xfe, 0xd4 };
static const uint8_t CALL = 0xe8;
static const uint8_t REX_ADD_I[] = { 0x49, 0x81 };
static const uint8_t REX_TEST[] = { 0x49, 0xF7 };
static const uint8_t JZ[] = { 0x0f, 0x84 };
static const uint8_t RET = 0xc3;
static const uint8_t LEA_32[] = { 0x41, 0x8d };
static const uint8_t MOVNTI[] = { 0x4c, 0x0f, 0xc3 };
static const uint8_t ADD_EBX_I[] = { 0x81, 0xc3 };
static const uint8_t NOP1[] = { 0x90 };
static const uint8_t NOP2[] = { 0x66, 0x90 };
static const uint8_t NOP3[] = { 0x66, 0x66, 0x90 };
static const uint8_t NOP4[] = { 0x0F, 0x1F, 0x40, 0x00 };
static const uint8_t NOP5[] = { 0x0F, 0x1F, 0x44, 0x00, 0x00 };
static const uint8_t NOP6[] = { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 };
static const uint8_t NOP7[] = { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 };
static const uint8_t NOP8[] = { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 };
static const uint8_t* NOPX[] = { NOP1, NOP2, NOP3, NOP4, NOP5, NOP6, NOP7, NOP8 };
size_t JitCompilerX86::getCodeSize() {
return CodeSize;
}
JitCompilerX86::JitCompilerX86() {
code = (uint8_t*)allocMemoryPages(CodeSize);
memcpy(code, codePrologue, prologueSize);
memcpy(code + epilogueOffset, codeEpilogue, epilogueSize);
}
JitCompilerX86::~JitCompilerX86() {
freePagedMemory(code, CodeSize);
}
void JitCompilerX86::enableAll() {
setPagesRWX(code, CodeSize);
}
void JitCompilerX86::enableWriting() {
setPagesRW(code, CodeSize);
}
void JitCompilerX86::enableExecution() {
setPagesRX(code, CodeSize);
}
void JitCompilerX86::generateProgram(Program& prog, ProgramConfiguration& pcfg) {
//printf("---\n");
generateProgramPrologue(prog, pcfg);
memcpy(code + codePos, codeReadDataset, readDatasetSize);
codePos += readDatasetSize;
generateProgramEpilogue(prog, pcfg);
}
void JitCompilerX86::generateProgramLight(Program& prog, ProgramConfiguration& pcfg, uint32_t datasetOffset) {
generateProgramPrologue(prog, pcfg);
emit(codeReadDatasetLightSshInit, readDatasetLightInitSize);
emit(ADD_EBX_I);
emit32(datasetOffset / CacheLineSize);
emitByte(CALL);
emit32(superScalarHashOffset - (codePos + 4));
emit(codeReadDatasetLightSshFin, readDatasetLightFinSize);
generateProgramEpilogue(prog, pcfg);
}
template<size_t N>
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[N], std::vector<uint64_t> &reciprocalCache) {
printf("xcccc\n");
memcpy(code + superScalarHashOffset, codeShhInit, codeSshInitSize);
codePos = superScalarHashOffset + codeSshInitSize;
for (unsigned j = 0; j < N; ++j) {
SuperscalarProgram& prog = programs[j];
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
generateSuperscalarCode(instr, reciprocalCache);
}
emit(codeShhLoad, codeSshLoadSize);
if (j < N - 1) {
emit(REX_MOV_RR64);
emitByte(0xd8 + prog.getAddressRegister());
emit(codeShhPrefetch, codeSshPrefetchSize);
#ifdef RANDOMX_ALIGN
int align = (codePos % 16);
while (align != 0) {
int nopSize = 16 - align;
if (nopSize > 8) nopSize = 8;
emit(NOPX[nopSize - 1], nopSize);
align = (codePos % 16);
}
#endif
}
}
emitByte(RET);
}
template
void JitCompilerX86::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector<uint64_t> &reciprocalCache);
void JitCompilerX86::generateDatasetInitCode() {
memcpy(code, codeDatasetInit, datasetInitSize);
}
void JitCompilerX86::generateProgramPrologue(Program& prog, ProgramConfiguration& pcfg) {
instructionOffsets.clear();
for (unsigned i = 0; i < 8; ++i) {
registerUsage[i] = -1;
}
codePos = ((uint8_t*)randomx_program_prologue_first_load) - ((uint8_t*)randomx_program_prologue);
code[codePos + sizeof(REX_XOR_RAX_R64)] = 0xc0 + pcfg.readReg0;
code[codePos + sizeof(REX_XOR_RAX_R64) * 2 + 1] = 0xc0 + pcfg.readReg1;
codePos = prologueSize;
memcpy(code + codePos - 48, &pcfg.eMask, sizeof(pcfg.eMask));
memcpy(code + codePos, codeLoopLoad, loopLoadSize);
codePos += loopLoadSize;
for (unsigned i = 0; i < prog.getSize(); ++i) {
Instruction& instr = prog(i);
instr.src %= RegistersCount;
instr.dst %= RegistersCount;
generateCode(instr, i);
}
emit(REX_MOV_RR);
emitByte(0xc0 + pcfg.readReg2);
emit(REX_XOR_EAX);
emitByte(0xc0 + pcfg.readReg3);
}
void JitCompilerX86::generateProgramEpilogue(Program& prog, ProgramConfiguration& pcfg) {
emit(REX_MOV_RR64);
emitByte(0xc0 + pcfg.readReg0);
emit(REX_XOR_RAX_R64);
emitByte(0xc0 + pcfg.readReg1);
emit((const uint8_t*)&randomx_prefetch_scratchpad, ((uint8_t*)&randomx_prefetch_scratchpad_end) - ((uint8_t*)&randomx_prefetch_scratchpad));
memcpy(code + codePos, codeLoopStore, loopStoreSize);
codePos += loopStoreSize;
emit(SUB_EBX);
emit(JNZ);
emit32(prologueSize - codePos - 4);
emitByte(JMP);
emit32(epilogueOffset - codePos - 4);
}
void JitCompilerX86::generateCode(Instruction& instr, int i) {
instructionOffsets.push_back(codePos);
auto generator = engine[instr.opcode];
(this->*generator)(instr, i);
}
void JitCompilerX86::generateSuperscalarCode(Instruction& instr, std::vector<uint64_t> &reciprocalCache) {
switch ((SuperscalarInstructionType)instr.opcode)
{
case randomx::SuperscalarInstructionType::ISUB_R:
emit(REX_SUB_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
break;
case randomx::SuperscalarInstructionType::IXOR_R:
emit(REX_XOR_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
break;
case randomx::SuperscalarInstructionType::IADD_RS:
emit(REX_LEA);
emitByte(0x04 + 8 * instr.dst);
genSIB(instr.getModShift(), instr.src, instr.dst);
break;
case randomx::SuperscalarInstructionType::IMUL_R:
emit(REX_IMUL_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
break;
case randomx::SuperscalarInstructionType::IROR_C:
emit(REX_ROT_I8);
emitByte(0xc8 + instr.dst);
emitByte(instr.getImm32() & 63);
break;
case randomx::SuperscalarInstructionType::IADD_C7:
emit(REX_81);
emitByte(0xc0 + instr.dst);
emit32(instr.getImm32());
break;
case randomx::SuperscalarInstructionType::IXOR_C7:
emit(REX_XOR_RI);
emitByte(0xf0 + instr.dst);
emit32(instr.getImm32());
break;
case randomx::SuperscalarInstructionType::IADD_C8:
emit(REX_81);
emitByte(0xc0 + instr.dst);
emit32(instr.getImm32());
#ifdef RANDOMX_ALIGN
emit(NOP1);
#endif
break;
case randomx::SuperscalarInstructionType::IXOR_C8:
emit(REX_XOR_RI);
emitByte(0xf0 + instr.dst);
emit32(instr.getImm32());
#ifdef RANDOMX_ALIGN
emit(NOP1);
#endif
break;
case randomx::SuperscalarInstructionType::IADD_C9:
emit(REX_81);
emitByte(0xc0 + instr.dst);
emit32(instr.getImm32());
#ifdef RANDOMX_ALIGN
emit(NOP2);
#endif
break;
case randomx::SuperscalarInstructionType::IXOR_C9:
emit(REX_XOR_RI);
emitByte(0xf0 + instr.dst);
emit32(instr.getImm32());
#ifdef RANDOMX_ALIGN
emit(NOP2);
#endif
break;
case randomx::SuperscalarInstructionType::IMULH_R:
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_R);
emitByte(0xe0 + instr.src);
emit(REX_MOV_R64R);
emitByte(0xc2 + 8 * instr.dst);
break;
case randomx::SuperscalarInstructionType::ISMULH_R:
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_R);
emitByte(0xe8 + instr.src);
emit(REX_MOV_R64R);
emitByte(0xc2 + 8 * instr.dst);
break;
case randomx::SuperscalarInstructionType::IMUL_RCP:
emit(MOV_RAX_I);
emit64(reciprocalCache[instr.getImm32()]);
emit(REX_IMUL_RM);
emitByte(0xc0 + 8 * instr.dst);
break;
default:
UNREACHABLE;
}
}
void JitCompilerX86::genAddressReg(Instruction& instr, bool rax = true) {
emit(LEA_32);
emitByte(0x80 + instr.src + (rax ? 0 : 8));
if (instr.src == RegisterNeedsSib) {
emitByte(0x24);
}
emit32(instr.getImm32());
if (rax)
emitByte(AND_EAX_I);
else
emit(AND_ECX_I);
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
void JitCompilerX86::genAddressRegDst(Instruction& instr) {
emit(LEA_32);
emitByte(0x80 + instr.dst);
if (instr.dst == RegisterNeedsSib) {
emitByte(0x24);
}
emit32(instr.getImm32());
emitByte(AND_EAX_I);
if (instr.getModCond() < StoreL3Condition) {
emit32(instr.getModMem() ? ScratchpadL1Mask : ScratchpadL2Mask);
}
else {
emit32(ScratchpadL3Mask);
}
}
void JitCompilerX86::genAddressImm(Instruction& instr) {
emit32(instr.getImm32() & ScratchpadL3Mask);
}
void JitCompilerX86::h_IADD_RS(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
emit(REX_LEA);
if (instr.dst == RegisterNeedsDisplacement)
emitByte(0xac);
else
emitByte(0x04 + 8 * instr.dst);
genSIB(instr.getModShift(), instr.src, instr.dst);
if (instr.dst == RegisterNeedsDisplacement)
emit32(instr.getImm32());
}
void JitCompilerX86::h_IADD_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
emit(REX_ADD_RM);
emitByte(0x04 + 8 * instr.dst);
emitByte(0x06);
}
else {
emit(REX_ADD_RM);
emitByte(0x86 + 8 * instr.dst);
genAddressImm(instr);
}
}
void JitCompilerX86::genSIB(int scale, int index, int base) {
emitByte((scale << 6) | (index << 3) | base);
}
void JitCompilerX86::h_ISUB_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_SUB_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
}
else {
emit(REX_81);
emitByte(0xe8 + instr.dst);
emit32(instr.getImm32());
}
}
void JitCompilerX86::h_ISUB_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
emit(REX_SUB_RM);
emitByte(0x04 + 8 * instr.dst);
emitByte(0x06);
}
else {
emit(REX_SUB_RM);
emitByte(0x86 + 8 * instr.dst);
genAddressImm(instr);
}
}
void JitCompilerX86::h_IMUL_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_IMUL_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
}
else {
emit(REX_IMUL_RRI);
emitByte(0xc0 + 9 * instr.dst);
emit32(instr.getImm32());
}
}
void JitCompilerX86::h_IMUL_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
emit(REX_IMUL_RM);
emitByte(0x04 + 8 * instr.dst);
emitByte(0x06);
}
else {
emit(REX_IMUL_RM);
emitByte(0x86 + 8 * instr.dst);
genAddressImm(instr);
}
}
void JitCompilerX86::h_IMULH_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_R);
emitByte(0xe0 + instr.src);
emit(REX_MOV_R64R);
emitByte(0xc2 + 8 * instr.dst);
}
void JitCompilerX86::h_IMULH_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, false);
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_MEM);
}
else {
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_M);
emitByte(0xa6);
genAddressImm(instr);
}
emit(REX_MOV_R64R);
emitByte(0xc2 + 8 * instr.dst);
}
void JitCompilerX86::h_ISMULH_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_R);
emitByte(0xe8 + instr.src);
emit(REX_MOV_R64R);
emitByte(0xc2 + 8 * instr.dst);
}
void JitCompilerX86::h_ISMULH_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr, false);
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_IMUL_MEM);
}
else {
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.dst);
emit(REX_MUL_M);
emitByte(0xae);
genAddressImm(instr);
}
emit(REX_MOV_R64R);
emitByte(0xc2 + 8 * instr.dst);
}
void JitCompilerX86::h_IMUL_RCP(Instruction& instr, int i) {
uint64_t divisor = instr.getImm32();
if (!isZeroOrPowerOf2(divisor)) {
registerUsage[instr.dst] = i;
emit(MOV_RAX_I);
emit64(randomx_reciprocal_fast(divisor));
emit(REX_IMUL_RM);
emitByte(0xc0 + 8 * instr.dst);
}
}
void JitCompilerX86::h_INEG_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
emit(REX_NEG);
emitByte(0xd8 + instr.dst);
}
void JitCompilerX86::h_IXOR_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_XOR_RR);
emitByte(0xc0 + 8 * instr.dst + instr.src);
}
else {
emit(REX_XOR_RI);
emitByte(0xf0 + instr.dst);
emit32(instr.getImm32());
}
}
void JitCompilerX86::h_IXOR_M(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
genAddressReg(instr);
emit(REX_XOR_RM);
emitByte(0x04 + 8 * instr.dst);
emitByte(0x06);
}
else {
emit(REX_XOR_RM);
emitByte(0x86 + 8 * instr.dst);
genAddressImm(instr);
}
}
void JitCompilerX86::h_IROR_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_MOV_RR);
emitByte(0xc8 + instr.src);
emit(REX_ROT_CL);
emitByte(0xc8 + instr.dst);
}
else {
emit(REX_ROT_I8);
emitByte(0xc8 + instr.dst);
emitByte(instr.getImm32() & 63);
}
}
void JitCompilerX86::h_IROL_R(Instruction& instr, int i) {
registerUsage[instr.dst] = i;
if (instr.src != instr.dst) {
emit(REX_MOV_RR);
emitByte(0xc8 + instr.src);
emit(REX_ROT_CL);
emitByte(0xc0 + instr.dst);
}
else {
emit(REX_ROT_I8);
emitByte(0xc0 + instr.dst);
emitByte(instr.getImm32() & 63);
}
}
void JitCompilerX86::h_ISWAP_R(Instruction& instr, int i) {
if (instr.src != instr.dst) {
registerUsage[instr.dst] = i;
registerUsage[instr.src] = i;
emit(REX_XCHG);
emitByte(0xc0 + instr.src + 8 * instr.dst);
}
}
void JitCompilerX86::h_FSWAP_R(Instruction& instr, int i) {
emit(SHUFPD);
emitByte(0xc0 + 9 * instr.dst);
emitByte(1);
}
void JitCompilerX86::h_FADD_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
emit(REX_ADDPD);
emitByte(0xc0 + instr.src + 8 * instr.dst);
}
void JitCompilerX86::h_FADD_M(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_ADDPD);
emitByte(0xc4 + 8 * instr.dst);
}
void JitCompilerX86::h_FSUB_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
emit(REX_SUBPD);
emitByte(0xc0 + instr.src + 8 * instr.dst);
}
void JitCompilerX86::h_FSUB_M(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_SUBPD);
emitByte(0xc4 + 8 * instr.dst);
}
void JitCompilerX86::h_FSCAL_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
emit(REX_XORPS);
emitByte(0xc7 + 8 * instr.dst);
}
void JitCompilerX86::h_FMUL_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
instr.src %= RegisterCountFlt;
emit(REX_MULPD);
emitByte(0xe0 + instr.src + 8 * instr.dst);
}
void JitCompilerX86::h_FDIV_M(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
genAddressReg(instr);
emit(REX_CVTDQ2PD_XMM12);
emit(REX_ANDPS_XMM12);
emit(REX_DIVPD);
emitByte(0xe4 + 8 * instr.dst);
}
void JitCompilerX86::h_FSQRT_R(Instruction& instr, int i) {
instr.dst %= RegisterCountFlt;
emit(SQRTPD);
emitByte(0xe4 + 9 * instr.dst);
}
void JitCompilerX86::h_CFROUND(Instruction& instr, int i) {
emit(REX_MOV_RR64);
emitByte(0xc0 + instr.src);
int rotate = (13 - (instr.getImm32() & 63)) & 63;
if (rotate != 0) {
emit(ROL_RAX);
emitByte(rotate);
}
emit(AND_OR_MOV_LDMXCSR);
}
void JitCompilerX86::h_CBRANCH(Instruction& instr, int i) {
int reg = instr.dst;
int target = registerUsage[reg] + 1;
emit(REX_ADD_I);
emitByte(0xc0 + reg);
int shift = instr.getModCond() + ConditionOffset;
uint32_t imm = instr.getImm32() | (1UL << shift);
if (ConditionOffset > 0 || shift > 0)
imm &= ~(1UL << (shift - 1));
emit32(imm);
emit(REX_TEST);
emitByte(0xc0 + reg);
emit32(ConditionMask << shift);
emit(JZ);
emit32(instructionOffsets[target] - (codePos + 4));
//mark all registers as used
for (unsigned j = 0; j < RegistersCount; ++j) {
registerUsage[j] = i;
}
}
void JitCompilerX86::h_ISTORE(Instruction& instr, int i) {
genAddressRegDst(instr);
emit(REX_MOV_MR);
emitByte(0x04 + 8 * instr.src);
emitByte(0x06);
}
void JitCompilerX86::h_NOP(Instruction& instr, int i) {
emit(NOP1);
}
#include "instruction_weights.hpp"
#define INST_HANDLE(x) REPN(&JitCompilerX86::h_##x, WT(x))
InstructionGeneratorX86 JitCompilerX86::engine[256] = {
INST_HANDLE(IADD_RS)
INST_HANDLE(IADD_M)
INST_HANDLE(ISUB_R)
INST_HANDLE(ISUB_M)
INST_HANDLE(IMUL_R)
INST_HANDLE(IMUL_M)
INST_HANDLE(IMULH_R)
INST_HANDLE(IMULH_M)
INST_HANDLE(ISMULH_R)
INST_HANDLE(ISMULH_M)
INST_HANDLE(IMUL_RCP)
INST_HANDLE(INEG_R)
INST_HANDLE(IXOR_R)
INST_HANDLE(IXOR_M)
INST_HANDLE(IROR_R)
INST_HANDLE(IROL_R)
INST_HANDLE(ISWAP_R)
INST_HANDLE(FSWAP_R)
INST_HANDLE(FADD_R)
INST_HANDLE(FADD_M)
INST_HANDLE(FSUB_R)
INST_HANDLE(FSUB_M)
INST_HANDLE(FSCAL_R)
INST_HANDLE(FMUL_R)
INST_HANDLE(FDIV_M)
INST_HANDLE(FSQRT_R)
INST_HANDLE(CBRANCH)
INST_HANDLE(CFROUND)
INST_HANDLE(ISTORE)
INST_HANDLE(NOP)
};
}

View File

@ -0,0 +1,142 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include <cstring>
#include <vector>
#include "common.hpp"
namespace randomx {
class Program;
class ProgramConfiguration;
class SuperscalarProgram;
class JitCompilerX86;
class Instruction;
typedef void(JitCompilerX86::*InstructionGeneratorX86)(Instruction&, int);
class JitCompilerX86 {
public:
JitCompilerX86();
~JitCompilerX86();
void generateProgram(Program&, ProgramConfiguration&);
void generateProgramLight(Program&, ProgramConfiguration&, uint32_t);
template<size_t N>
void generateSuperscalarHash(SuperscalarProgram (&programs)[N], std::vector<uint64_t> &);
void generateDatasetInitCode();
ProgramFunc* getProgramFunc() {
return (ProgramFunc*)code;
}
DatasetInitFunc* getDatasetInitFunc() {
return (DatasetInitFunc*)code;
}
uint8_t* getCode() {
return code;
}
size_t getCodeSize();
void enableWriting();
void enableExecution();
void enableAll();
private:
static InstructionGeneratorX86 engine[256];
std::vector<int32_t> instructionOffsets;
int registerUsage[RegistersCount];
uint8_t* code;
int32_t codePos;
void generateProgramPrologue(Program&, ProgramConfiguration&);
void generateProgramEpilogue(Program&, ProgramConfiguration&);
void genAddressReg(Instruction&, bool);
void genAddressRegDst(Instruction&);
void genAddressImm(Instruction&);
void genSIB(int scale, int index, int base);
void generateCode(Instruction&, int);
void generateSuperscalarCode(Instruction &, std::vector<uint64_t> &);
void emitByte(uint8_t val) {
code[codePos] = val;
codePos++;
}
void emit32(uint32_t val) {
memcpy(code + codePos, &val, sizeof val);
codePos += sizeof val;
}
void emit64(uint64_t val) {
memcpy(code + codePos, &val, sizeof val);
codePos += sizeof val;
}
template<size_t N>
void emit(const uint8_t (&src)[N]) {
emit(src, N);
}
void emit(const uint8_t* src, size_t count) {
memcpy(code + codePos, src, count);
codePos += count;
}
void h_IADD_RS(Instruction&, int);
void h_IADD_M(Instruction&, int);
void h_ISUB_R(Instruction&, int);
void h_ISUB_M(Instruction&, int);
void h_IMUL_R(Instruction&, int);
void h_IMUL_M(Instruction&, int);
void h_IMULH_R(Instruction&, int);
void h_IMULH_M(Instruction&, int);
void h_ISMULH_R(Instruction&, int);
void h_ISMULH_M(Instruction&, int);
void h_IMUL_RCP(Instruction&, int);
void h_INEG_R(Instruction&, int);
void h_IXOR_R(Instruction&, int);
void h_IXOR_M(Instruction&, int);
void h_IROR_R(Instruction&, int);
void h_IROL_R(Instruction&, int);
void h_ISWAP_R(Instruction&, int);
void h_FSWAP_R(Instruction&, int);
void h_FADD_R(Instruction&, int);
void h_FADD_M(Instruction&, int);
void h_FSUB_R(Instruction&, int);
void h_FSUB_M(Instruction&, int);
void h_FSCAL_R(Instruction&, int);
void h_FMUL_R(Instruction&, int);
void h_FDIV_M(Instruction&, int);
void h_FSQRT_R(Instruction&, int);
void h_CBRANCH(Instruction&, int);
void h_CFROUND(Instruction&, int);
void h_ISTORE(Instruction&, int);
void h_NOP(Instruction&, int);
};
}

View File

@ -0,0 +1,232 @@
# Copyright (c) 2018-2019, tevador <tevador@gmail.com>
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the copyright holder nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.intel_syntax noprefix
#if defined(__APPLE__)
.text
#define DECL(x) _##x
#else
.section .text
#define DECL(x) x
#endif
#if defined(__WIN32__) || defined(__CYGWIN__)
#define WINABI
#endif
.global DECL(randomx_prefetch_scratchpad)
.global DECL(randomx_prefetch_scratchpad_end)
.global DECL(randomx_program_prologue)
.global DECL(randomx_program_prologue_first_load)
.global DECL(randomx_program_loop_begin)
.global DECL(randomx_program_loop_load)
.global DECL(randomx_program_start)
.global DECL(randomx_program_read_dataset)
.global DECL(randomx_program_read_dataset_sshash_init)
.global DECL(randomx_program_read_dataset_sshash_fin)
.global DECL(randomx_program_loop_store)
.global DECL(randomx_program_loop_end)
.global DECL(randomx_dataset_init)
.global DECL(randomx_program_epilogue)
.global DECL(randomx_sshash_load)
.global DECL(randomx_sshash_prefetch)
.global DECL(randomx_sshash_end)
.global DECL(randomx_sshash_init)
.global DECL(randomx_program_end)
.global DECL(randomx_reciprocal_fast)
#include "configuration.h"
#define RANDOMX_SCRATCHPAD_MASK (RANDOMX_SCRATCHPAD_L3-64)
#define RANDOMX_DATASET_BASE_MASK (RANDOMX_DATASET_BASE_SIZE-64)
#define RANDOMX_CACHE_MASK (RANDOMX_ARGON_MEMORY*16-1)
#define RANDOMX_ALIGN 4096
#define SUPERSCALAR_OFFSET ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
#define db .byte
DECL(randomx_prefetch_scratchpad):
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rax]
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rdx]
DECL(randomx_prefetch_scratchpad_end):
.balign 64
DECL(randomx_program_prologue):
#if defined(WINABI)
#include "asm/program_prologue_win64.inc"
#else
#include "asm/program_prologue_linux.inc"
#endif
movapd xmm13, xmmword ptr [mantissaMask+rip]
movapd xmm14, xmmword ptr [exp240+rip]
movapd xmm15, xmmword ptr [scaleMask+rip]
DECL(randomx_program_prologue_first_load):
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp DECL(randomx_program_loop_begin)
.balign 64
#include "asm/program_xmm_constants.inc"
.balign 64
DECL(randomx_program_loop_begin):
nop
DECL(randomx_program_loop_load):
#include "asm/program_loop_load.inc"
DECL(randomx_program_start):
nop
DECL(randomx_program_read_dataset):
#include "asm/program_read_dataset.inc"
DECL(randomx_program_read_dataset_sshash_init):
#include "asm/program_read_dataset_sshash_init.inc"
DECL(randomx_program_read_dataset_sshash_fin):
#include "asm/program_read_dataset_sshash_fin.inc"
DECL(randomx_program_loop_store):
#include "asm/program_loop_store.inc"
DECL(randomx_program_loop_end):
nop
.balign 64
DECL(randomx_dataset_init):
push rbx
push rbp
push r12
push r13
push r14
push r15
#if defined(WINABI)
push rdi
push rsi
mov rdi, qword ptr [rcx] ;# cache->memory
mov rsi, rdx ;# dataset
mov rbp, r8 ;# block index
push r9 ;# max. block index
#else
mov rdi, qword ptr [rdi] ;# cache->memory
;# dataset in rsi
mov rbp, rdx ;# block index
push rcx ;# max. block index
#endif
init_block_loop:
prefetchw byte ptr [rsi]
mov rbx, rbp
.byte 232 ;# 0xE8 = call
.int SUPERSCALAR_OFFSET - (call_offset - DECL(randomx_dataset_init))
call_offset:
mov qword ptr [rsi+0], r8
mov qword ptr [rsi+8], r9
mov qword ptr [rsi+16], r10
mov qword ptr [rsi+24], r11
mov qword ptr [rsi+32], r12
mov qword ptr [rsi+40], r13
mov qword ptr [rsi+48], r14
mov qword ptr [rsi+56], r15
add rbp, 1
add rsi, 64
cmp rbp, qword ptr [rsp]
jb init_block_loop
pop rax
#if defined(WINABI)
pop rsi
pop rdi
#endif
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
ret
.balign 64
DECL(randomx_program_epilogue):
#include "asm/program_epilogue_store.inc"
#if defined(WINABI)
#include "asm/program_epilogue_win64.inc"
#else
#include "asm/program_epilogue_linux.inc"
#endif
.balign 64
DECL(randomx_sshash_load):
#include "asm/program_sshash_load.inc"
DECL(randomx_sshash_prefetch):
#include "asm/program_sshash_prefetch.inc"
DECL(randomx_sshash_end):
nop
.balign 64
DECL(randomx_sshash_init):
lea r8, [rbx+1]
#include "asm/program_sshash_prefetch.inc"
imul r8, qword ptr [r0_mul+rip]
mov r9, qword ptr [r1_add+rip]
xor r9, r8
mov r10, qword ptr [r2_add+rip]
xor r10, r8
mov r11, qword ptr [r3_add+rip]
xor r11, r8
mov r12, qword ptr [r4_add+rip]
xor r12, r8
mov r13, qword ptr [r5_add+rip]
xor r13, r8
mov r14, qword ptr [r6_add+rip]
xor r14, r8
mov r15, qword ptr [r7_add+rip]
xor r15, r8
jmp DECL(randomx_program_end)
.balign 64
#include "asm/program_sshash_constants.inc"
.balign 64
DECL(randomx_program_end):
nop
DECL(randomx_reciprocal_fast):
#if !defined(WINABI)
mov rcx, rdi
#endif
#include "asm/randomx_reciprocal.inc"

View File

@ -0,0 +1,227 @@
; Copyright (c) 2018-2019, tevador <tevador@gmail.com>
;
; All rights reserved.
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions are met:
; * Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
; * Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
; * Neither the name of the copyright holder nor the
; names of its contributors may be used to endorse or promote products
; derived from this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
; CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
; OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
IFDEF RAX
_RANDOMX_JITX86_STATIC SEGMENT PAGE READ EXECUTE
PUBLIC randomx_prefetch_scratchpad
PUBLIC randomx_prefetch_scratchpad_end
PUBLIC randomx_program_prologue
PUBLIC randomx_program_prologue_first_load
PUBLIC randomx_program_loop_begin
PUBLIC randomx_program_loop_load
PUBLIC randomx_program_start
PUBLIC randomx_program_read_dataset
PUBLIC randomx_program_read_dataset_sshash_init
PUBLIC randomx_program_read_dataset_sshash_fin
PUBLIC randomx_dataset_init
PUBLIC randomx_program_loop_store
PUBLIC randomx_program_loop_end
PUBLIC randomx_program_epilogue
PUBLIC randomx_sshash_load
PUBLIC randomx_sshash_prefetch
PUBLIC randomx_sshash_end
PUBLIC randomx_sshash_init
PUBLIC randomx_program_end
PUBLIC randomx_reciprocal_fast
include asm/configuration.asm
RANDOMX_SCRATCHPAD_MASK EQU (RANDOMX_SCRATCHPAD_L3-64)
RANDOMX_DATASET_BASE_MASK EQU (RANDOMX_DATASET_BASE_SIZE-64)
RANDOMX_CACHE_MASK EQU (RANDOMX_ARGON_MEMORY*16-1)
RANDOMX_ALIGN EQU 4096
SUPERSCALAR_OFFSET EQU ((((RANDOMX_ALIGN + 32 * RANDOMX_PROGRAM_SIZE) - 1) / (RANDOMX_ALIGN) + 1) * (RANDOMX_ALIGN))
randomx_prefetch_scratchpad PROC
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rax]
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
prefetcht0 [rsi+rdx]
randomx_prefetch_scratchpad ENDP
randomx_prefetch_scratchpad_end PROC
randomx_prefetch_scratchpad_end ENDP
ALIGN 64
randomx_program_prologue PROC
include asm/program_prologue_win64.inc
movapd xmm13, xmmword ptr [mantissaMask]
movapd xmm14, xmmword ptr [exp240]
movapd xmm15, xmmword ptr [scaleMask]
randomx_program_prologue ENDP
randomx_program_prologue_first_load PROC
xor rax, r8
xor rax, r8
mov rdx, rax
and eax, RANDOMX_SCRATCHPAD_MASK
ror rdx, 32
and edx, RANDOMX_SCRATCHPAD_MASK
jmp randomx_program_loop_begin
randomx_program_prologue_first_load ENDP
ALIGN 64
include asm/program_xmm_constants.inc
ALIGN 64
randomx_program_loop_begin PROC
nop
randomx_program_loop_begin ENDP
randomx_program_loop_load PROC
include asm/program_loop_load.inc
randomx_program_loop_load ENDP
randomx_program_start PROC
nop
randomx_program_start ENDP
randomx_program_read_dataset PROC
include asm/program_read_dataset.inc
randomx_program_read_dataset ENDP
randomx_program_read_dataset_sshash_init PROC
include asm/program_read_dataset_sshash_init.inc
randomx_program_read_dataset_sshash_init ENDP
randomx_program_read_dataset_sshash_fin PROC
include asm/program_read_dataset_sshash_fin.inc
randomx_program_read_dataset_sshash_fin ENDP
randomx_program_loop_store PROC
include asm/program_loop_store.inc
randomx_program_loop_store ENDP
randomx_program_loop_end PROC
nop
randomx_program_loop_end ENDP
ALIGN 64
randomx_dataset_init PROC
push rbx
push rbp
push rdi
push rsi
push r12
push r13
push r14
push r15
mov rdi, qword ptr [rcx] ;# cache->memory
mov rsi, rdx ;# dataset
mov rbp, r8 ;# block index
push r9 ;# max. block index
init_block_loop:
prefetchw byte ptr [rsi]
mov rbx, rbp
db 232 ;# 0xE8 = call
dd SUPERSCALAR_OFFSET - distance
distance equ $ - offset randomx_dataset_init
mov qword ptr [rsi+0], r8
mov qword ptr [rsi+8], r9
mov qword ptr [rsi+16], r10
mov qword ptr [rsi+24], r11
mov qword ptr [rsi+32], r12
mov qword ptr [rsi+40], r13
mov qword ptr [rsi+48], r14
mov qword ptr [rsi+56], r15
add rbp, 1
add rsi, 64
cmp rbp, qword ptr [rsp]
jb init_block_loop
pop r9
pop r15
pop r14
pop r13
pop r12
pop rsi
pop rdi
pop rbp
pop rbx
ret
randomx_dataset_init ENDP
ALIGN 64
randomx_program_epilogue PROC
include asm/program_epilogue_store.inc
include asm/program_epilogue_win64.inc
randomx_program_epilogue ENDP
ALIGN 64
randomx_sshash_load PROC
include asm/program_sshash_load.inc
randomx_sshash_load ENDP
randomx_sshash_prefetch PROC
include asm/program_sshash_prefetch.inc
randomx_sshash_prefetch ENDP
randomx_sshash_end PROC
nop
randomx_sshash_end ENDP
ALIGN 64
randomx_sshash_init PROC
lea r8, [rbx+1]
include asm/program_sshash_prefetch.inc
imul r8, qword ptr [r0_mul]
mov r9, qword ptr [r1_add]
xor r9, r8
mov r10, qword ptr [r2_add]
xor r10, r8
mov r11, qword ptr [r3_add]
xor r11, r8
mov r12, qword ptr [r4_add]
xor r12, r8
mov r13, qword ptr [r5_add]
xor r13, r8
mov r14, qword ptr [r6_add]
xor r14, r8
mov r15, qword ptr [r7_add]
xor r15, r8
jmp randomx_program_end
randomx_sshash_init ENDP
ALIGN 64
include asm/program_sshash_constants.inc
ALIGN 64
randomx_program_end PROC
nop
randomx_program_end ENDP
randomx_reciprocal_fast PROC
include asm/randomx_reciprocal.inc
randomx_reciprocal_fast ENDP
_RANDOMX_JITX86_STATIC ENDS
ENDIF
END

View File

@ -0,0 +1,51 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
extern "C" {
void randomx_prefetch_scratchpad();
void randomx_prefetch_scratchpad_end();
void randomx_program_prologue();
void randomx_program_prologue_first_load();
void randomx_program_loop_begin();
void randomx_program_loop_load();
void randomx_program_start();
void randomx_program_read_dataset();
void randomx_program_read_dataset_sshash_init();
void randomx_program_read_dataset_sshash_fin();
void randomx_program_loop_store();
void randomx_program_loop_end();
void randomx_dataset_init();
void randomx_program_epilogue();
void randomx_sshash_load();
void randomx_sshash_prefetch();
void randomx_sshash_end();
void randomx_sshash_init();
void randomx_program_end();
}

View File

@ -0,0 +1,35 @@
project(mingw_stdthreads)
cmake_minimum_required(VERSION 3.0)
option(MINGW_STDTHREADS_BUILD_TEST "Build tests")
option(MINGW_STDTHREADS_GENERATE_STDHEADERS "Generate std-like headers")
string(CONCAT mingw_stdthreads_dir_docstring
"Optional. When generating std-like headers , this variable can be set"
"to manually specify the path to mingw-stdthreads directory containing"
"original library headers.")
set(MINGW_STDTHREADS_DIR "${PROJECT_SOURCE_DIR}"
CACHE PATH ${mingw_stdthreads_dir_docstring})
# mingw-stdthreads is a header-only library, so make it a INTERFACE target
add_library(${PROJECT_NAME} INTERFACE)
target_include_directories(${PROJECT_NAME} INTERFACE "${PROJECT_SOURCE_DIR}")
if(MINGW_STDTHREADS_GENERATE_STDHEADERS)
# Check if we are using gcc or clang
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
# Add as dependency and generate std headers
add_subdirectory(cmake_stdheaders_generator)
target_link_libraries(${PROJECT_NAME} INTERFACE
cmake_stdheaders_generator)
else()
message(WARNING "Cannot generate std headers with this compiler: "
${CMAKE_CXX_COMPILER_ID} ". "
"Please fall back to #include <mingw.xxx.h>")
endif()
endif()
# Build tests.exe
if(MINGW_STDTHREADS_BUILD_TEST)
add_subdirectory(tests)
endif()

View File

@ -0,0 +1,24 @@
Copyright (c) 2016, Mega Limited
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,58 @@
mingw-std-threads
=================
Implementation of standard C++11 threading classes, which are currently still missing on MinGW GCC.
Target Windows version
----------------------
This implementation should work with Windows XP (regardless of service pack), or newer.
The library automatically detects the version of Windows that is being targeted (at compile time), and selects an implementation that takes advantage of available Windows features.
In MinGW GCC, the target Windows version may optionally be selected by the command-line option `-D _WIN32_WINNT=...`.
Use `0x0600` for Windows Vista, or `0x0601` for Windows 7.
See "[Modifying `WINVER` and `_WIN32_WINNT`](https://docs.microsoft.com/en-us/cpp/porting/modifying-winver-and-win32-winnt)" for more details.
Usage
-----
This is a header-only library. To use, just include the corresponding `mingw.xxx.h file`, where `xxx` would be the name of the standard header that you would normally include.
For example, `#include "mingw.thread.h"` replaces `#include <thread>`.
A `CMakeLists.txt` has also been provided. You can add it to your project by using `add_subdirectory()`, and then this library can be added as your targets' dependency by using `target_link_libraries(YOUR_TARGET PRIVATE mingw_stdthreads)`. By default it just adds a include path, allowing you to include headers using angle brackets (for example `#include <mingw.thread.h>`). But you can also provide options to let it generate "std-like" headers (see next paragraph).
Using "std-like" headers
------------------------
Probably you don't really want to replace all your includes from `#include <header>` to `#include "mingw.header.h"`. So if you are using GCC or clang, here are some ways to make you happy :)
With CMake, you just need to turn on the option `MINGW_STDTHREADS_GENERATE_STDHEADERS` before adding mingw-stdthreads, something like this:
```CMake
option(MINGW_STDTHREADS_GENERATE_STDHEADERS "" ON)
add_subdirectory(mingw_stdthreads)
target_link_libraries(${TARGET} PRIVATE mingw_stdthreads)
```
When CMake generates project files, headers named in the "standard header" way will be generated and added to your include path. Then you can avoid stuffs like `mingw.thread.h`, and keep using `#include <thread>` like always. In addition, `MINGW_STDTHREADS_GENERATED_STDHEADERS` will be defined, you can use this macro to check if those generated headers are actually available.
If you aren't using CMake, you can use one of the three scripts inside [utility_scripts](utility_scripts) directory to manually generate those "std-like" headers. Note that this requires Microsoft Power Shell, so if you are cross-compiling, you would need to install Power Shell.
Compatibility
-------------
This code has been tested to work with MinGW-w64 5.3.0, but should work with any other MinGW version that has the `std` threading classes missing, has C++11 support for lambda functions, variadic templates, and has working mutex helper classes in `<mutex>`.
Switching from the win32-pthread based implementation
-----------------------------------------------------
It seems that recent versions of MinGW-w64 include a Win32 port of pthreads, and have the `std::thread`, `std::mutex`, etc. classes implemented and working based on that compatibility
layer.
That is a somewhat heavier implementation, as it relies on an abstraction layer, so you may still want to use this implementation for efficiency purposes.
Unfortunately you can't use this library standalone and independent of the system `<mutex>` headers, as it relies on those headers for `std::unique_lock` and other non-trivial utility classes.
In that case you will need to edit the `c++-config.h` file of your MinGW setup and comment out the definition of _GLIBCXX_HAS_GTHREADS.
This will cause the system headers not to define the actual `thread`, `mutex`, etc. classes, but still define the necessary utility classes.
Why MinGW has no threading classes
----------------------------------
It seems that for cross-platform threading implementation, the GCC standard library relies on the gthreads/pthreads library.
If this library is not available, as is the case with MinGW, the classes `std::thread`, `std::mutex`, `std::condition_variable` are not defined.
However, various usable helper classes are still defined in the system headers.
Hence, this implementation does not re-define them, and instead includes those headers.

View File

@ -0,0 +1,78 @@
cmake_minimum_required(VERSION 3.0)
project(cmake_stdheaders_generator)
set(output_include_path "${PROJECT_BINARY_DIR}/${PROJECT_NAME}")
message("${PROJECT_NAME}: output_include_path set to ${output_include_path}")
function(generate_mingw_stdthreads_header header_file_name
mingw_stdthreads_folder)
set(template_file_path "${PROJECT_SOURCE_DIR}/template.cpp")
set(destination_file_path "${output_include_path}/${header_file_name}")
# Check if compiler is gcc or clang
if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
# Actually this should never happen because it should have already
# been checked in the parent CMakeLists.txt
message(FATAL_ERROR "Unsupported compiler")
endif()
# Call g++ to retrieve header path
# The -H option will let g++ outputs header dependencies to stderr
set(compiler_arguments
${template_file_path}
-H
"-DMINGW_STDTHREADS_DETECTING_SYSTEM_HEADER=<${header_file_name}>")
# And content of stderr is saved to variable compiler_output
execute_process(COMMAND "${CMAKE_CXX_COMPILER}" ${compiler_arguments}
ERROR_VARIABLE compiler_output
OUTPUT_QUIET)
# Get full path to system header
string(REGEX MATCH "[.] ([^\r\n]*)" _ "${compiler_output}")
set(mingw_stdthreads_headers_generator_system_header "${CMAKE_MATCH_1}")
message("Matched: <${mingw_stdthreads_headers_generator_system_header}>")
# Ensure file exists
if(NOT EXISTS "${mingw_stdthreads_headers_generator_system_header}")
message(FATAL_ERROR "<${header_file_name}>'s path not found, "
"compiler output was:\n${compiler_output}")
endif()
# Get full path to mingw-stdthreads header
set(mingw_stdthreads_headers_generator_library_header
"${mingw_stdthreads_folder}/mingw.${header_file_name}.h")
# Normalize paths
file(TO_CMAKE_PATH "${mingw_stdthreads_headers_generator_system_header}"
mingw_stdthreads_headers_generator_system_header)
file(TO_CMAKE_PATH "${mingw_stdthreads_headers_generator_library_header}"
mingw_stdthreads_headers_generator_library_header)
configure_file("${template_file_path}" "${destination_file_path}")
endfunction()
if(EXISTS "${MINGW_STDTHREADS_DIR}")
message("${PROJECT_NAME}: MINGW_STDTHREADS_DIR: "
"${MINGW_STDTHREADS_DIR}")
else()
message(FATAL_ERROR "${PROECT_NAME}: MINGW_STDTHREADS_DIR does not "
"exist: ${MINGW_STDTHREADS_DIR}")
endif()
# <condition_variable>
generate_mingw_stdthreads_header(condition_variable "${MINGW_STDTHREADS_DIR}")
# <future>
generate_mingw_stdthreads_header(future "${MINGW_STDTHREADS_DIR}")
# <mutex>
generate_mingw_stdthreads_header(mutex "${MINGW_STDTHREADS_DIR}")
# <shared_mutex>
generate_mingw_stdthreads_header(shared_mutex "${MINGW_STDTHREADS_DIR}")
# <thread>
generate_mingw_stdthreads_header(thread "${MINGW_STDTHREADS_DIR}")
# the generated headers are to be considered as a header only library
# so we create an interface target
add_library(${PROJECT_NAME} INTERFACE)
target_compile_definitions(${PROJECT_NAME} INTERFACE
MINGW_STDTHREADS_GENERATED_STDHEADERS)
target_include_directories(${PROJECT_NAME} INTERFACE "${output_include_path}")

View File

@ -0,0 +1,11 @@
#ifdef MINGW_STDTHREADS_DETECTING_SYSTEM_HEADER
#include MINGW_STDTHREADS_DETECTING_SYSTEM_HEADER
static_assert(false, "Prevent compilation")
#else
#pragma once
// both system header and mignw-stdthreads header should already have include
// guards. But we still add a #pragma once just to be safe.
#include "${mingw_stdthreads_headers_generator_system_header}"
#include "${mingw_stdthreads_headers_generator_library_header}"
#endif

View File

@ -0,0 +1,564 @@
/**
* @file condition_variable.h
* @brief std::condition_variable implementation for MinGW
*
* (c) 2013-2016 by Mega Limited, Auckland, New Zealand
* @author Alexander Vassilev
*
* @copyright Simplified (2-clause) BSD License.
* You should have received a copy of the license along with this
* program.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* @note
* This file may become part of the mingw-w64 runtime package. If/when this happens,
* the appropriate license will be added, i.e. this code will become dual-licensed,
* and the current BSD 2-clause license will stay.
*/
#ifndef MINGW_CONDITIONAL_VARIABLE_H
#define MINGW_CONDITIONAL_VARIABLE_H
#if !defined(__cplusplus) || (__cplusplus < 201103L)
#error A C++11 compiler is required!
#endif
// Use the standard classes for std::, if available.
#include <condition_variable>
#include <cassert>
#include <chrono>
#include <system_error>
#include <sdkddkver.h> // Detect Windows version.
#if (WINVER < _WIN32_WINNT_VISTA)
#include <atomic>
#endif
#if (defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR))
#pragma message "The Windows API that MinGW-w32 provides is not fully compatible\
with Microsoft's API. We'll try to work around this, but we can make no\
guarantees. This problem does not exist in MinGW-w64."
#include <windows.h> // No further granularity can be expected.
#else
#if (WINVER < _WIN32_WINNT_VISTA)
#include <windef.h>
#include <winbase.h> // For CreateSemaphore
#include <handleapi.h>
#endif
#include <synchapi.h>
#endif
#include "mingw.mutex.h"
#include "mingw.shared_mutex.h"
#if !defined(_WIN32_WINNT) || (_WIN32_WINNT < 0x0501)
#error To use the MinGW-std-threads library, you will need to define the macro _WIN32_WINNT to be 0x0501 (Windows XP) or higher.
#endif
namespace mingw_stdthread
{
#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
enum class cv_status { no_timeout, timeout };
#else
using std::cv_status;
#endif
namespace xp
{
// Include the XP-compatible condition_variable classes only if actually
// compiling for XP. The XP-compatible classes are slower than the newer
// versions, and depend on features not compatible with Windows Phone 8.
#if (WINVER < _WIN32_WINNT_VISTA)
class condition_variable_any
{
recursive_mutex mMutex {};
std::atomic<int> mNumWaiters {0};
HANDLE mSemaphore;
HANDLE mWakeEvent {};
public:
using native_handle_type = HANDLE;
native_handle_type native_handle()
{
return mSemaphore;
}
condition_variable_any(const condition_variable_any&) = delete;
condition_variable_any& operator=(const condition_variable_any&) = delete;
condition_variable_any()
: mSemaphore(CreateSemaphoreA(NULL, 0, 0xFFFF, NULL))
{
if (mSemaphore == NULL)
throw std::system_error(GetLastError(), std::generic_category());
mWakeEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
if (mWakeEvent == NULL)
{
CloseHandle(mSemaphore);
throw std::system_error(GetLastError(), std::generic_category());
}
}
~condition_variable_any()
{
CloseHandle(mWakeEvent);
CloseHandle(mSemaphore);
}
private:
template <class M>
bool wait_impl(M& lock, DWORD timeout)
{
{
lock_guard<recursive_mutex> guard(mMutex);
mNumWaiters++;
}
lock.unlock();
DWORD ret = WaitForSingleObject(mSemaphore, timeout);
mNumWaiters--;
SetEvent(mWakeEvent);
lock.lock();
if (ret == WAIT_OBJECT_0)
return true;
else if (ret == WAIT_TIMEOUT)
return false;
//2 possible cases:
//1)The point in notify_all() where we determine the count to
//increment the semaphore with has not been reached yet:
//we just need to decrement mNumWaiters, but setting the event does not hurt
//
//2)Semaphore has just been released with mNumWaiters just before
//we decremented it. This means that the semaphore count
//after all waiters finish won't be 0 - because not all waiters
//woke up by acquiring the semaphore - we woke up by a timeout.
//The notify_all() must handle this gracefully
//
else
{
using namespace std;
throw system_error(make_error_code(errc::protocol_error));
}
}
public:
template <class M>
void wait(M& lock)
{
wait_impl(lock, INFINITE);
}
template <class M, class Predicate>
void wait(M& lock, Predicate pred)
{
while(!pred())
{
wait(lock);
};
}
void notify_all() noexcept
{
lock_guard<recursive_mutex> lock(mMutex); //block any further wait requests until all current waiters are unblocked
if (mNumWaiters.load() <= 0)
return;
ReleaseSemaphore(mSemaphore, mNumWaiters, NULL);
while(mNumWaiters > 0)
{
auto ret = WaitForSingleObject(mWakeEvent, 1000);
if (ret == WAIT_FAILED || ret == WAIT_ABANDONED)
std::terminate();
}
assert(mNumWaiters == 0);
//in case some of the waiters timed out just after we released the
//semaphore by mNumWaiters, it won't be zero now, because not all waiters
//woke up by acquiring the semaphore. So we must zero the semaphore before
//we accept waiters for the next event
//See _wait_impl for details
while(WaitForSingleObject(mSemaphore, 0) == WAIT_OBJECT_0);
}
void notify_one() noexcept
{
lock_guard<recursive_mutex> lock(mMutex);
int targetWaiters = mNumWaiters.load() - 1;
if (targetWaiters <= -1)
return;
ReleaseSemaphore(mSemaphore, 1, NULL);
while(mNumWaiters > targetWaiters)
{
auto ret = WaitForSingleObject(mWakeEvent, 1000);
if (ret == WAIT_FAILED || ret == WAIT_ABANDONED)
std::terminate();
}
assert(mNumWaiters == targetWaiters);
}
template <class M, class Rep, class Period>
cv_status wait_for(M& lock,
const std::chrono::duration<Rep, Period>& rel_time)
{
using namespace std::chrono;
auto timeout = duration_cast<milliseconds>(rel_time).count();
DWORD waittime = (timeout < INFINITE) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (INFINITE - 1);
bool ret = wait_impl(lock, waittime) || (timeout >= INFINITE);
return ret?cv_status::no_timeout:cv_status::timeout;
}
template <class M, class Rep, class Period, class Predicate>
bool wait_for(M& lock,
const std::chrono::duration<Rep, Period>& rel_time, Predicate pred)
{
return wait_until(lock, std::chrono::steady_clock::now()+rel_time, pred);
}
template <class M, class Clock, class Duration>
cv_status wait_until (M& lock,
const std::chrono::time_point<Clock,Duration>& abs_time)
{
return wait_for(lock, abs_time - Clock::now());
}
template <class M, class Clock, class Duration, class Predicate>
bool wait_until (M& lock,
const std::chrono::time_point<Clock, Duration>& abs_time,
Predicate pred)
{
while (!pred())
{
if (wait_until(lock, abs_time) == cv_status::timeout)
{
return pred();
}
}
return true;
}
};
class condition_variable: condition_variable_any
{
using base = condition_variable_any;
public:
using base::native_handle_type;
using base::native_handle;
using base::base;
using base::notify_all;
using base::notify_one;
void wait(unique_lock<mutex> &lock)
{
base::wait(lock);
}
template <class Predicate>
void wait(unique_lock<mutex>& lock, Predicate pred)
{
base::wait(lock, pred);
}
template <class Rep, class Period>
cv_status wait_for(unique_lock<mutex>& lock, const std::chrono::duration<Rep, Period>& rel_time)
{
return base::wait_for(lock, rel_time);
}
template <class Rep, class Period, class Predicate>
bool wait_for(unique_lock<mutex>& lock, const std::chrono::duration<Rep, Period>& rel_time, Predicate pred)
{
return base::wait_for(lock, rel_time, pred);
}
template <class Clock, class Duration>
cv_status wait_until (unique_lock<mutex>& lock, const std::chrono::time_point<Clock,Duration>& abs_time)
{
return base::wait_until(lock, abs_time);
}
template <class Clock, class Duration, class Predicate>
bool wait_until (unique_lock<mutex>& lock, const std::chrono::time_point<Clock, Duration>& abs_time, Predicate pred)
{
return base::wait_until(lock, abs_time, pred);
}
};
#endif // Compiling for XP
} // Namespace mingw_stdthread::xp
#if (WINVER >= _WIN32_WINNT_VISTA)
namespace vista
{
// If compiling for Vista or higher, use the native condition variable.
class condition_variable
{
static constexpr DWORD kInfinite = 0xffffffffl;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
CONDITION_VARIABLE cvariable_ = CONDITION_VARIABLE_INIT;
#pragma GCC diagnostic pop
friend class condition_variable_any;
#if STDMUTEX_RECURSION_CHECKS
template<typename MTX>
inline static void before_wait (MTX * pmutex)
{
pmutex->mOwnerThread.checkSetOwnerBeforeUnlock();
}
template<typename MTX>
inline static void after_wait (MTX * pmutex)
{
pmutex->mOwnerThread.setOwnerAfterLock(GetCurrentThreadId());
}
#else
inline static void before_wait (void *) { }
inline static void after_wait (void *) { }
#endif
bool wait_impl (unique_lock<xp::mutex> & lock, DWORD time)
{
using mutex_handle_type = typename xp::mutex::native_handle_type;
static_assert(std::is_same<mutex_handle_type, PCRITICAL_SECTION>::value,
"Native Win32 condition variable requires std::mutex to \
use native Win32 critical section objects.");
xp::mutex * pmutex = lock.release();
before_wait(pmutex);
BOOL success = SleepConditionVariableCS(&cvariable_,
pmutex->native_handle(),
time);
after_wait(pmutex);
lock = unique_lock<xp::mutex>(*pmutex, adopt_lock);
return success;
}
bool wait_unique (windows7::mutex * pmutex, DWORD time)
{
before_wait(pmutex);
BOOL success = SleepConditionVariableSRW( native_handle(),
pmutex->native_handle(),
time,
// CONDITION_VARIABLE_LOCKMODE_SHARED has a value not specified by
// Microsoft's Dev Center, but is known to be (convertible to) a ULONG. To
// ensure that the value passed to this function is not equal to Microsoft's
// constant, we can either use a static_assert, or simply generate an
// appropriate value.
!CONDITION_VARIABLE_LOCKMODE_SHARED);
after_wait(pmutex);
return success;
}
bool wait_impl (unique_lock<windows7::mutex> & lock, DWORD time)
{
windows7::mutex * pmutex = lock.release();
bool success = wait_unique(pmutex, time);
lock = unique_lock<windows7::mutex>(*pmutex, adopt_lock);
return success;
}
public:
using native_handle_type = PCONDITION_VARIABLE;
native_handle_type native_handle (void)
{
return &cvariable_;
}
condition_variable (void) = default;
~condition_variable (void) = default;
condition_variable (const condition_variable &) = delete;
condition_variable & operator= (const condition_variable &) = delete;
void notify_one (void) noexcept
{
WakeConditionVariable(&cvariable_);
}
void notify_all (void) noexcept
{
WakeAllConditionVariable(&cvariable_);
}
void wait (unique_lock<mutex> & lock)
{
wait_impl(lock, kInfinite);
}
template<class Predicate>
void wait (unique_lock<mutex> & lock, Predicate pred)
{
while (!pred())
wait(lock);
}
template <class Rep, class Period>
cv_status wait_for(unique_lock<mutex>& lock,
const std::chrono::duration<Rep, Period>& rel_time)
{
using namespace std::chrono;
auto timeout = duration_cast<milliseconds>(rel_time).count();
DWORD waittime = (timeout < kInfinite) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (kInfinite - 1);
bool result = wait_impl(lock, waittime) || (timeout >= kInfinite);
return result ? cv_status::no_timeout : cv_status::timeout;
}
template <class Rep, class Period, class Predicate>
bool wait_for(unique_lock<mutex>& lock,
const std::chrono::duration<Rep, Period>& rel_time,
Predicate pred)
{
return wait_until(lock,
std::chrono::steady_clock::now() + rel_time,
std::move(pred));
}
template <class Clock, class Duration>
cv_status wait_until (unique_lock<mutex>& lock,
const std::chrono::time_point<Clock,Duration>& abs_time)
{
return wait_for(lock, abs_time - Clock::now());
}
template <class Clock, class Duration, class Predicate>
bool wait_until (unique_lock<mutex>& lock,
const std::chrono::time_point<Clock, Duration>& abs_time,
Predicate pred)
{
while (!pred())
{
if (wait_until(lock, abs_time) == cv_status::timeout)
{
return pred();
}
}
return true;
}
};
class condition_variable_any
{
static constexpr DWORD kInfinite = 0xffffffffl;
using native_shared_mutex = windows7::shared_mutex;
condition_variable internal_cv_ {};
// When available, the SRW-based mutexes should be faster than the
// CriticalSection-based mutexes. Only try_lock will be unavailable in Vista,
// and try_lock is not used by condition_variable_any.
windows7::mutex internal_mutex_ {};
template<class L>
bool wait_impl (L & lock, DWORD time)
{
unique_lock<decltype(internal_mutex_)> internal_lock(internal_mutex_);
lock.unlock();
bool success = internal_cv_.wait_impl(internal_lock, time);
lock.lock();
return success;
}
// If the lock happens to be called on a native Windows mutex, skip any extra
// contention.
inline bool wait_impl (unique_lock<mutex> & lock, DWORD time)
{
return internal_cv_.wait_impl(lock, time);
}
// Some shared_mutex functionality is available even in Vista, but it's not
// until Windows 7 that a full implementation is natively possible. The class
// itself is defined, with missing features, at the Vista feature level.
bool wait_impl (unique_lock<native_shared_mutex> & lock, DWORD time)
{
native_shared_mutex * pmutex = lock.release();
bool success = internal_cv_.wait_unique(pmutex, time);
lock = unique_lock<native_shared_mutex>(*pmutex, adopt_lock);
return success;
}
bool wait_impl (shared_lock<native_shared_mutex> & lock, DWORD time)
{
native_shared_mutex * pmutex = lock.release();
BOOL success = SleepConditionVariableSRW(native_handle(),
pmutex->native_handle(), time,
CONDITION_VARIABLE_LOCKMODE_SHARED);
lock = shared_lock<native_shared_mutex>(*pmutex, adopt_lock);
return success;
}
public:
using native_handle_type = typename condition_variable::native_handle_type;
native_handle_type native_handle (void)
{
return internal_cv_.native_handle();
}
void notify_one (void) noexcept
{
internal_cv_.notify_one();
}
void notify_all (void) noexcept
{
internal_cv_.notify_all();
}
condition_variable_any (void) = default;
~condition_variable_any (void) = default;
template<class L>
void wait (L & lock)
{
wait_impl(lock, kInfinite);
}
template<class L, class Predicate>
void wait (L & lock, Predicate pred)
{
while (!pred())
wait(lock);
}
template <class L, class Rep, class Period>
cv_status wait_for(L& lock, const std::chrono::duration<Rep,Period>& period)
{
using namespace std::chrono;
auto timeout = duration_cast<milliseconds>(period).count();
DWORD waittime = (timeout < kInfinite) ? ((timeout < 0) ? 0 : static_cast<DWORD>(timeout)) : (kInfinite - 1);
bool result = wait_impl(lock, waittime) || (timeout >= kInfinite);
return result ? cv_status::no_timeout : cv_status::timeout;
}
template <class L, class Rep, class Period, class Predicate>
bool wait_for(L& lock, const std::chrono::duration<Rep, Period>& period,
Predicate pred)
{
return wait_until(lock, std::chrono::steady_clock::now() + period,
std::move(pred));
}
template <class L, class Clock, class Duration>
cv_status wait_until (L& lock,
const std::chrono::time_point<Clock,Duration>& abs_time)
{
return wait_for(lock, abs_time - Clock::now());
}
template <class L, class Clock, class Duration, class Predicate>
bool wait_until (L& lock,
const std::chrono::time_point<Clock, Duration>& abs_time,
Predicate pred)
{
while (!pred())
{
if (wait_until(lock, abs_time) == cv_status::timeout)
{
return pred();
}
}
return true;
}
};
} // Namespace vista
#endif
#if WINVER < 0x0600
using xp::condition_variable;
using xp::condition_variable_any;
#else
using vista::condition_variable;
using vista::condition_variable_any;
#endif
} // Namespace mingw_stdthread
// Push objects into std, but only if they are not already there.
namespace std
{
// Because of quirks of the compiler, the common "using namespace std;"
// directive would flatten the namespaces and introduce ambiguity where there
// was none. Direct specification (std::), however, would be unaffected.
// Take the safe option, and include only in the presence of MinGW's win32
// implementation.
#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
using mingw_stdthread::cv_status;
using mingw_stdthread::condition_variable;
using mingw_stdthread::condition_variable_any;
#elif !defined(MINGW_STDTHREAD_REDUNDANCY_WARNING) // Skip repetition
#define MINGW_STDTHREAD_REDUNDANCY_WARNING
#pragma message "This version of MinGW seems to include a win32 port of\
pthreads, and probably already has C++11 std threading classes implemented,\
based on pthreads. These classes, found in namespace std, are not overridden\
by the mingw-std-thread library. If you would still like to use this\
implementation (as it is more lightweight), use the classes provided in\
namespace mingw_stdthread."
#endif
}
#endif // MINGW_CONDITIONAL_VARIABLE_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,109 @@
/// \file mingw.invoke.h
/// \brief Lightweight `invoke` implementation, for C++11 and C++14.
///
/// (c) 2018-2019 by Nathaniel J. McClatchey, San Jose, CA, United States
/// \author Nathaniel J. McClatchey, PhD
///
/// \copyright Simplified (2-clause) BSD License.
///
/// \note This file may become part of the mingw-w64 runtime package. If/when
/// this happens, the appropriate license will be added, i.e. this code will
/// become dual-licensed, and the current BSD 2-clause license will stay.
#ifndef MINGW_INVOKE_H_
#define MINGW_INVOKE_H_
#include <type_traits> // For std::result_of, etc.
#include <utility> // For std::forward
#include <functional> // For std::reference_wrapper
namespace mingw_stdthread
{
namespace detail
{
// For compatibility, implement std::invoke for C++11 and C++14
#if __cplusplus < 201703L
template<bool PMemFunc, bool PMemData>
struct Invoker
{
template<class F, class... Args>
inline static typename std::result_of<F(Args...)>::type invoke (F&& f, Args&&... args)
{
return std::forward<F>(f)(std::forward<Args>(args)...);
}
};
template<bool>
struct InvokerHelper;
template<>
struct InvokerHelper<false>
{
template<class T1>
inline static auto get (T1&& t1) -> decltype(*std::forward<T1>(t1))
{
return *std::forward<T1>(t1);
}
template<class T1>
inline static auto get (const std::reference_wrapper<T1>& t1) -> decltype(t1.get())
{
return t1.get();
}
};
template<>
struct InvokerHelper<true>
{
template<class T1>
inline static auto get (T1&& t1) -> decltype(std::forward<T1>(t1))
{
return std::forward<T1>(t1);
}
};
template<>
struct Invoker<true, false>
{
template<class T, class F, class T1, class... Args>
inline static auto invoke (F T::* f, T1&& t1, Args&&... args) ->\
decltype((InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(std::forward<T1>(t1)).*f)(std::forward<Args>(args)...))
{
return (InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(std::forward<T1>(t1)).*f)(std::forward<Args>(args)...);
}
};
template<>
struct Invoker<false, true>
{
template<class T, class F, class T1, class... Args>
inline static auto invoke (F T::* f, T1&& t1, Args&&... args) ->\
decltype(InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(t1).*f)
{
return InvokerHelper<std::is_base_of<T,typename std::decay<T1>::type>::value>::get(t1).*f;
}
};
template<class F, class... Args>
struct InvokeResult
{
typedef Invoker<std::is_member_function_pointer<typename std::remove_reference<F>::type>::value,
std::is_member_object_pointer<typename std::remove_reference<F>::type>::value &&
(sizeof...(Args) == 1)> invoker;
inline static auto invoke (F&& f, Args&&... args) -> decltype(invoker::invoke(std::forward<F>(f), std::forward<Args>(args)...))
{
return invoker::invoke(std::forward<F>(f), std::forward<Args>(args)...);
}
};
template<class F, class...Args>
auto invoke (F&& f, Args&&... args) -> decltype(InvokeResult<F, Args...>::invoke(std::forward<F>(f), std::forward<Args>(args)...))
{
return InvokeResult<F, Args...>::invoke(std::forward<F>(f), std::forward<Args>(args)...);
}
#else
using std::invoke;
#endif
} // Namespace "detail"
} // Namespace "mingw_stdthread"
#endif

View File

@ -0,0 +1,491 @@
/**
* @file mingw.mutex.h
* @brief std::mutex et al implementation for MinGW
** (c) 2013-2016 by Mega Limited, Auckland, New Zealand
* @author Alexander Vassilev
*
* @copyright Simplified (2-clause) BSD License.
* You should have received a copy of the license along with this
* program.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* @note
* This file may become part of the mingw-w64 runtime package. If/when this happens,
* the appropriate license will be added, i.e. this code will become dual-licensed,
* and the current BSD 2-clause license will stay.
*/
#ifndef WIN32STDMUTEX_H
#define WIN32STDMUTEX_H
#if !defined(__cplusplus) || (__cplusplus < 201103L)
#error A C++11 compiler is required!
#endif
// Recursion checks on non-recursive locks have some performance penalty, and
// the C++ standard does not mandate them. The user might want to explicitly
// enable or disable such checks. If the user has no preference, enable such
// checks in debug builds, but not in release builds.
#ifdef STDMUTEX_RECURSION_CHECKS
#elif defined(NDEBUG)
#define STDMUTEX_RECURSION_CHECKS 0
#else
#define STDMUTEX_RECURSION_CHECKS 1
#endif
#include <chrono>
#include <system_error>
#include <atomic>
#include <mutex> //need for call_once()
#if STDMUTEX_RECURSION_CHECKS || !defined(NDEBUG)
#include <cstdio>
#endif
#include <sdkddkver.h> // Detect Windows version.
#if (defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR))
#pragma message "The Windows API that MinGW-w32 provides is not fully compatible\
with Microsoft's API. We'll try to work around this, but we can make no\
guarantees. This problem does not exist in MinGW-w64."
#include <windows.h> // No further granularity can be expected.
#else
#if STDMUTEX_RECURSION_CHECKS
#include <processthreadsapi.h> // For GetCurrentThreadId
#endif
#include <synchapi.h> // For InitializeCriticalSection, etc.
#include <errhandlingapi.h> // For GetLastError
#include <handleapi.h>
#endif
// Need for the implementation of invoke
#include "mingw.invoke.h"
#if !defined(_WIN32_WINNT) || (_WIN32_WINNT < 0x0501)
#error To use the MinGW-std-threads library, you will need to define the macro _WIN32_WINNT to be 0x0501 (Windows XP) or higher.
#endif
namespace mingw_stdthread
{
// The _NonRecursive class has mechanisms that do not play nice with direct
// manipulation of the native handle. This forward declaration is part of
// a friend class declaration.
#if STDMUTEX_RECURSION_CHECKS
namespace vista
{
class condition_variable;
}
#endif
// To make this namespace equivalent to the thread-related subset of std,
// pull in the classes and class templates supplied by std but not by this
// implementation.
using std::lock_guard;
using std::unique_lock;
using std::adopt_lock_t;
using std::defer_lock_t;
using std::try_to_lock_t;
using std::adopt_lock;
using std::defer_lock;
using std::try_to_lock;
class recursive_mutex
{
CRITICAL_SECTION mHandle;
public:
typedef LPCRITICAL_SECTION native_handle_type;
native_handle_type native_handle() {return &mHandle;}
recursive_mutex() noexcept : mHandle()
{
InitializeCriticalSection(&mHandle);
}
recursive_mutex (const recursive_mutex&) = delete;
recursive_mutex& operator=(const recursive_mutex&) = delete;
~recursive_mutex() noexcept
{
DeleteCriticalSection(&mHandle);
}
void lock()
{
EnterCriticalSection(&mHandle);
}
void unlock()
{
LeaveCriticalSection(&mHandle);
}
bool try_lock()
{
return (TryEnterCriticalSection(&mHandle)!=0);
}
};
#if STDMUTEX_RECURSION_CHECKS
struct _OwnerThread
{
// If this is to be read before locking, then the owner-thread variable must
// be atomic to prevent a torn read from spuriously causing errors.
std::atomic<DWORD> mOwnerThread;
constexpr _OwnerThread () noexcept : mOwnerThread(0) {}
static void on_deadlock (void)
{
using namespace std;
fprintf(stderr, "FATAL: Recursive locking of non-recursive mutex\
detected. Throwing system exception\n");
fflush(stderr);
throw system_error(make_error_code(errc::resource_deadlock_would_occur));
}
DWORD checkOwnerBeforeLock() const
{
DWORD self = GetCurrentThreadId();
if (mOwnerThread.load(std::memory_order_relaxed) == self)
on_deadlock();
return self;
}
void setOwnerAfterLock(DWORD id)
{
mOwnerThread.store(id, std::memory_order_relaxed);
}
void checkSetOwnerBeforeUnlock()
{
DWORD self = GetCurrentThreadId();
if (mOwnerThread.load(std::memory_order_relaxed) != self)
on_deadlock();
mOwnerThread.store(0, std::memory_order_relaxed);
}
};
#endif
// Though the Slim Reader-Writer (SRW) locks used here are not complete until
// Windows 7, implementing partial functionality in Vista will simplify the
// interaction with condition variables.
#if defined(_WIN32) && (WINVER >= _WIN32_WINNT_VISTA)
namespace windows7
{
class mutex
{
SRWLOCK mHandle;
// Track locking thread for error checking.
#if STDMUTEX_RECURSION_CHECKS
friend class vista::condition_variable;
_OwnerThread mOwnerThread {};
#endif
public:
typedef PSRWLOCK native_handle_type;
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
constexpr mutex () noexcept : mHandle(SRWLOCK_INIT) { }
#pragma GCC diagnostic pop
mutex (const mutex&) = delete;
mutex & operator= (const mutex&) = delete;
void lock (void)
{
// Note: Undefined behavior if called recursively.
#if STDMUTEX_RECURSION_CHECKS
DWORD self = mOwnerThread.checkOwnerBeforeLock();
#endif
AcquireSRWLockExclusive(&mHandle);
#if STDMUTEX_RECURSION_CHECKS
mOwnerThread.setOwnerAfterLock(self);
#endif
}
void unlock (void)
{
#if STDMUTEX_RECURSION_CHECKS
mOwnerThread.checkSetOwnerBeforeUnlock();
#endif
ReleaseSRWLockExclusive(&mHandle);
}
// TryAcquireSRW functions are a Windows 7 feature.
#if (WINVER >= _WIN32_WINNT_WIN7)
bool try_lock (void)
{
#if STDMUTEX_RECURSION_CHECKS
DWORD self = mOwnerThread.checkOwnerBeforeLock();
#endif
BOOL ret = TryAcquireSRWLockExclusive(&mHandle);
#if STDMUTEX_RECURSION_CHECKS
if (ret)
mOwnerThread.setOwnerAfterLock(self);
#endif
return ret;
}
#endif
native_handle_type native_handle (void)
{
return &mHandle;
}
};
} // Namespace windows7
#endif // Compiling for Vista
namespace xp
{
class mutex
{
CRITICAL_SECTION mHandle;
std::atomic_uchar mState;
// Track locking thread for error checking.
#if STDMUTEX_RECURSION_CHECKS
friend class vista::condition_variable;
_OwnerThread mOwnerThread {};
#endif
public:
typedef PCRITICAL_SECTION native_handle_type;
constexpr mutex () noexcept : mHandle(), mState(2) { }
mutex (const mutex&) = delete;
mutex & operator= (const mutex&) = delete;
~mutex() noexcept
{
// Undefined behavior if the mutex is held (locked) by any thread.
// Undefined behavior if a thread terminates while holding ownership of the
// mutex.
DeleteCriticalSection(&mHandle);
}
void lock (void)
{
unsigned char state = mState.load(std::memory_order_acquire);
while (state) {
if ((state == 2) && mState.compare_exchange_weak(state, 1, std::memory_order_acquire))
{
InitializeCriticalSection(&mHandle);
mState.store(0, std::memory_order_release);
break;
}
if (state == 1)
{
Sleep(0);
state = mState.load(std::memory_order_acquire);
}
}
#if STDMUTEX_RECURSION_CHECKS
DWORD self = mOwnerThread.checkOwnerBeforeLock();
#endif
EnterCriticalSection(&mHandle);
#if STDMUTEX_RECURSION_CHECKS
mOwnerThread.setOwnerAfterLock(self);
#endif
}
void unlock (void)
{
#if STDMUTEX_RECURSION_CHECKS
mOwnerThread.checkSetOwnerBeforeUnlock();
#endif
LeaveCriticalSection(&mHandle);
}
bool try_lock (void)
{
unsigned char state = mState.load(std::memory_order_acquire);
if ((state == 2) && mState.compare_exchange_strong(state, 1, std::memory_order_acquire))
{
InitializeCriticalSection(&mHandle);
mState.store(0, std::memory_order_release);
}
if (state == 1)
return false;
#if STDMUTEX_RECURSION_CHECKS
DWORD self = mOwnerThread.checkOwnerBeforeLock();
#endif
BOOL ret = TryEnterCriticalSection(&mHandle);
#if STDMUTEX_RECURSION_CHECKS
if (ret)
mOwnerThread.setOwnerAfterLock(self);
#endif
return ret;
}
native_handle_type native_handle (void)
{
return &mHandle;
}
};
} // Namespace "xp"
#if (WINVER >= _WIN32_WINNT_WIN7)
using windows7::mutex;
#else
using xp::mutex;
#endif
class recursive_timed_mutex
{
static constexpr DWORD kWaitAbandoned = 0x00000080l;
static constexpr DWORD kWaitObject0 = 0x00000000l;
static constexpr DWORD kInfinite = 0xffffffffl;
inline bool try_lock_internal (DWORD ms) noexcept
{
DWORD ret = WaitForSingleObject(mHandle, ms);
#ifndef NDEBUG
if (ret == kWaitAbandoned)
{
using namespace std;
fprintf(stderr, "FATAL: Thread terminated while holding a mutex.");
terminate();
}
#endif
return (ret == kWaitObject0) || (ret == kWaitAbandoned);
}
protected:
HANDLE mHandle;
// Track locking thread for error checking of non-recursive timed_mutex. For
// standard compliance, this must be defined in same class and at the same
// access-control level as every other variable in the timed_mutex.
#if STDMUTEX_RECURSION_CHECKS
friend class vista::condition_variable;
_OwnerThread mOwnerThread {};
#endif
public:
typedef HANDLE native_handle_type;
native_handle_type native_handle() const {return mHandle;}
recursive_timed_mutex(const recursive_timed_mutex&) = delete;
recursive_timed_mutex& operator=(const recursive_timed_mutex&) = delete;
recursive_timed_mutex(): mHandle(CreateMutex(NULL, FALSE, NULL)) {}
~recursive_timed_mutex()
{
CloseHandle(mHandle);
}
void lock()
{
DWORD ret = WaitForSingleObject(mHandle, kInfinite);
// If (ret == WAIT_ABANDONED), then the thread that held ownership was
// terminated. Behavior is undefined, but Windows will pass ownership to this
// thread.
#ifndef NDEBUG
if (ret == kWaitAbandoned)
{
using namespace std;
fprintf(stderr, "FATAL: Thread terminated while holding a mutex.");
terminate();
}
#endif
if ((ret != kWaitObject0) && (ret != kWaitAbandoned))
{
throw std::system_error(GetLastError(), std::system_category());
}
}
void unlock()
{
if (!ReleaseMutex(mHandle))
throw std::system_error(GetLastError(), std::system_category());
}
bool try_lock()
{
return try_lock_internal(0);
}
template <class Rep, class Period>
bool try_lock_for(const std::chrono::duration<Rep,Period>& dur)
{
using namespace std::chrono;
auto timeout = duration_cast<milliseconds>(dur).count();
while (timeout > 0)
{
constexpr auto kMaxStep = static_cast<decltype(timeout)>(kInfinite-1);
auto step = (timeout < kMaxStep) ? timeout : kMaxStep;
if (try_lock_internal(static_cast<DWORD>(step)))
return true;
timeout -= step;
}
return false;
}
template <class Clock, class Duration>
bool try_lock_until(const std::chrono::time_point<Clock,Duration>& timeout_time)
{
return try_lock_for(timeout_time - Clock::now());
}
};
// Override if, and only if, it is necessary for error-checking.
#if STDMUTEX_RECURSION_CHECKS
class timed_mutex: recursive_timed_mutex
{
public:
timed_mutex(const timed_mutex&) = delete;
timed_mutex& operator=(const timed_mutex&) = delete;
void lock()
{
DWORD self = mOwnerThread.checkOwnerBeforeLock();
recursive_timed_mutex::lock();
mOwnerThread.setOwnerAfterLock(self);
}
void unlock()
{
mOwnerThread.checkSetOwnerBeforeUnlock();
recursive_timed_mutex::unlock();
}
template <class Rep, class Period>
bool try_lock_for(const std::chrono::duration<Rep,Period>& dur)
{
DWORD self = mOwnerThread.checkOwnerBeforeLock();
bool ret = recursive_timed_mutex::try_lock_for(dur);
if (ret)
mOwnerThread.setOwnerAfterLock(self);
return ret;
}
template <class Clock, class Duration>
bool try_lock_until(const std::chrono::time_point<Clock,Duration>& timeout_time)
{
return try_lock_for(timeout_time - Clock::now());
}
bool try_lock ()
{
return try_lock_for(std::chrono::milliseconds(0));
}
};
#else
typedef recursive_timed_mutex timed_mutex;
#endif
class once_flag
{
// When available, the SRW-based mutexes should be faster than the
// CriticalSection-based mutexes. Only try_lock will be unavailable in Vista,
// and try_lock is not used by once_flag.
#if (_WIN32_WINNT == _WIN32_WINNT_VISTA)
windows7::mutex mMutex;
#else
mutex mMutex;
#endif
std::atomic_bool mHasRun;
once_flag(const once_flag&) = delete;
once_flag& operator=(const once_flag&) = delete;
template<class Callable, class... Args>
friend void call_once(once_flag& once, Callable&& f, Args&&... args);
public:
constexpr once_flag() noexcept: mMutex(), mHasRun(false) {}
};
template<class Callable, class... Args>
void call_once(once_flag& flag, Callable&& func, Args&&... args)
{
if (flag.mHasRun.load(std::memory_order_acquire))
return;
lock_guard<decltype(flag.mMutex)> lock(flag.mMutex);
if (flag.mHasRun.load(std::memory_order_acquire))
return;
detail::invoke(std::forward<Callable>(func),std::forward<Args>(args)...);
flag.mHasRun.store(true, std::memory_order_release);
}
} // Namespace mingw_stdthread
// Push objects into std, but only if they are not already there.
namespace std
{
// Because of quirks of the compiler, the common "using namespace std;"
// directive would flatten the namespaces and introduce ambiguity where there
// was none. Direct specification (std::), however, would be unaffected.
// Take the safe option, and include only in the presence of MinGW's win32
// implementation.
#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
using mingw_stdthread::recursive_mutex;
using mingw_stdthread::mutex;
using mingw_stdthread::recursive_timed_mutex;
using mingw_stdthread::timed_mutex;
using mingw_stdthread::once_flag;
using mingw_stdthread::call_once;
#elif !defined(MINGW_STDTHREAD_REDUNDANCY_WARNING) // Skip repetition
#define MINGW_STDTHREAD_REDUNDANCY_WARNING
#pragma message "This version of MinGW seems to include a win32 port of\
pthreads, and probably already has C++11 std threading classes implemented,\
based on pthreads. These classes, found in namespace std, are not overridden\
by the mingw-std-thread library. If you would still like to use this\
implementation (as it is more lightweight), use the classes provided in\
namespace mingw_stdthread."
#endif
}
#endif // WIN32STDMUTEX_H

View File

@ -0,0 +1,503 @@
/// \file mingw.shared_mutex.h
/// \brief Standard-compliant shared_mutex for MinGW
///
/// (c) 2017 by Nathaniel J. McClatchey, Athens OH, United States
/// \author Nathaniel J. McClatchey
///
/// \copyright Simplified (2-clause) BSD License.
///
/// \note This file may become part of the mingw-w64 runtime package. If/when
/// this happens, the appropriate license will be added, i.e. this code will
/// become dual-licensed, and the current BSD 2-clause license will stay.
/// \note Target Windows version is determined by WINVER, which is determined in
/// <windows.h> from _WIN32_WINNT, which can itself be set by the user.
// Notes on the namespaces:
// - The implementation can be accessed directly in the namespace
// mingw_stdthread.
// - Objects will be brought into namespace std by a using directive. This
// will cause objects declared in std (such as MinGW's implementation) to
// hide this implementation's definitions.
// - To avoid poluting the namespace with implementation details, all objects
// to be pushed into std will be placed in mingw_stdthread::visible.
// The end result is that if MinGW supplies an object, it is automatically
// used. If MinGW does not supply an object, this implementation's version will
// instead be used.
#ifndef MINGW_SHARED_MUTEX_H_
#define MINGW_SHARED_MUTEX_H_
#if !defined(__cplusplus) || (__cplusplus < 201103L)
#error A C++11 compiler is required!
#endif
#include <cassert>
// For descriptive errors.
#include <system_error>
// Implementing a shared_mutex without OS support will require atomic read-
// modify-write capacity.
#include <atomic>
// For timing in shared_lock and shared_timed_mutex.
#include <chrono>
#include <limits>
// Use MinGW's shared_lock class template, if it's available. Requires C++14.
// If unavailable (eg. because this library is being used in C++11), then an
// implementation of shared_lock is provided by this header.
#if (__cplusplus >= 201402L)
#include <shared_mutex>
#endif
// For defer_lock_t, adopt_lock_t, and try_to_lock_t
#include "mingw.mutex.h"
// For this_thread::yield.
//#include "mingw.thread.h"
// Might be able to use native Slim Reader-Writer (SRW) locks.
#ifdef _WIN32
#include <sdkddkver.h> // Detect Windows version.
#if (defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR))
#pragma message "The Windows API that MinGW-w32 provides is not fully compatible\
with Microsoft's API. We'll try to work around this, but we can make no\
guarantees. This problem does not exist in MinGW-w64."
#include <windows.h> // No further granularity can be expected.
#else
#include <synchapi.h>
#endif
#endif
namespace mingw_stdthread
{
// Define a portable atomics-based shared_mutex
namespace portable
{
class shared_mutex
{
typedef uint_fast16_t counter_type;
std::atomic<counter_type> mCounter {0};
static constexpr counter_type kWriteBit = 1 << (std::numeric_limits<counter_type>::digits - 1);
#if STDMUTEX_RECURSION_CHECKS
// Runtime checker for verifying owner threads. Note: Exclusive mode only.
_OwnerThread mOwnerThread {};
#endif
public:
typedef shared_mutex * native_handle_type;
shared_mutex () = default;
// No form of copying or moving should be allowed.
shared_mutex (const shared_mutex&) = delete;
shared_mutex & operator= (const shared_mutex&) = delete;
~shared_mutex ()
{
// Terminate if someone tries to destroy an owned mutex.
assert(mCounter.load(std::memory_order_relaxed) == 0);
}
void lock_shared (void)
{
counter_type expected = mCounter.load(std::memory_order_relaxed);
do
{
// Delay if writing or if too many readers are attempting to read.
if (expected >= kWriteBit - 1)
{
using namespace std;
expected = mCounter.load(std::memory_order_relaxed);
continue;
}
if (mCounter.compare_exchange_weak(expected,
static_cast<counter_type>(expected + 1),
std::memory_order_acquire,
std::memory_order_relaxed))
break;
}
while (true);
}
bool try_lock_shared (void)
{
counter_type expected = mCounter.load(std::memory_order_relaxed) & static_cast<counter_type>(~kWriteBit);
if (expected + 1 == kWriteBit)
return false;
else
return mCounter.compare_exchange_strong( expected,
static_cast<counter_type>(expected + 1),
std::memory_order_acquire,
std::memory_order_relaxed);
}
void unlock_shared (void)
{
using namespace std;
#ifndef NDEBUG
if (!(mCounter.fetch_sub(1, memory_order_release) & static_cast<counter_type>(~kWriteBit)))
throw system_error(make_error_code(errc::operation_not_permitted));
#else
mCounter.fetch_sub(1, memory_order_release);
#endif
}
// Behavior is undefined if a lock was previously acquired.
void lock (void)
{
#if STDMUTEX_RECURSION_CHECKS
DWORD self = mOwnerThread.checkOwnerBeforeLock();
#endif
using namespace std;
// Might be able to use relaxed memory order...
// Wait for the write-lock to be unlocked, then claim the write slot.
counter_type current;
while ((current = mCounter.fetch_or(kWriteBit, std::memory_order_acquire)) & kWriteBit);
//this_thread::yield();
// Wait for readers to finish up.
while (current != kWriteBit)
{
//this_thread::yield();
current = mCounter.load(std::memory_order_acquire);
}
#if STDMUTEX_RECURSION_CHECKS
mOwnerThread.setOwnerAfterLock(self);
#endif
}
bool try_lock (void)
{
#if STDMUTEX_RECURSION_CHECKS
DWORD self = mOwnerThread.checkOwnerBeforeLock();
#endif
counter_type expected = 0;
bool ret = mCounter.compare_exchange_strong(expected, kWriteBit,
std::memory_order_acquire,
std::memory_order_relaxed);
#if STDMUTEX_RECURSION_CHECKS
if (ret)
mOwnerThread.setOwnerAfterLock(self);
#endif
return ret;
}
void unlock (void)
{
#if STDMUTEX_RECURSION_CHECKS
mOwnerThread.checkSetOwnerBeforeUnlock();
#endif
using namespace std;
#ifndef NDEBUG
if (mCounter.load(memory_order_relaxed) != kWriteBit)
throw system_error(make_error_code(errc::operation_not_permitted));
#endif
mCounter.store(0, memory_order_release);
}
native_handle_type native_handle (void)
{
return this;
}
};
} // Namespace portable
// The native shared_mutex implementation primarily uses features of Windows
// Vista, but the features used for try_lock and try_lock_shared were not
// introduced until Windows 7. To allow limited use while compiling for Vista,
// I define the class without try_* functions in that case.
// Only fully-featured implementations will be placed into namespace std.
#if defined(_WIN32) && (WINVER >= _WIN32_WINNT_VISTA)
namespace vista
{
class condition_variable_any;
}
namespace windows7
{
// We already #include "mingw.mutex.h". May as well reduce redundancy.
class shared_mutex : windows7::mutex
{
// Allow condition_variable_any (and only condition_variable_any) to treat a
// shared_mutex as its base class.
friend class vista::condition_variable_any;
public:
using windows7::mutex::native_handle_type;
using windows7::mutex::lock;
using windows7::mutex::unlock;
using windows7::mutex::native_handle;
void lock_shared (void)
{
AcquireSRWLockShared(native_handle());
}
void unlock_shared (void)
{
ReleaseSRWLockShared(native_handle());
}
// TryAcquireSRW functions are a Windows 7 feature.
#if (WINVER >= _WIN32_WINNT_WIN7)
bool try_lock_shared (void)
{
return TryAcquireSRWLockShared(native_handle()) != 0;
}
using windows7::mutex::try_lock;
#endif
};
} // Namespace windows7
#endif // Compiling for Vista
#if (defined(_WIN32) && (WINVER >= _WIN32_WINNT_WIN7))
using windows7::shared_mutex;
#else
using portable::shared_mutex;
#endif
class shared_timed_mutex : shared_mutex
{
typedef shared_mutex Base;
public:
using Base::lock;
using Base::try_lock;
using Base::unlock;
using Base::lock_shared;
using Base::try_lock_shared;
using Base::unlock_shared;
template< class Clock, class Duration >
bool try_lock_until ( const std::chrono::time_point<Clock,Duration>& cutoff )
{
do
{
if (try_lock())
return true;
}
while (std::chrono::steady_clock::now() < cutoff);
return false;
}
template< class Rep, class Period >
bool try_lock_for (const std::chrono::duration<Rep,Period>& rel_time)
{
return try_lock_until(std::chrono::steady_clock::now() + rel_time);
}
template< class Clock, class Duration >
bool try_lock_shared_until ( const std::chrono::time_point<Clock,Duration>& cutoff )
{
do
{
if (try_lock_shared())
return true;
}
while (std::chrono::steady_clock::now() < cutoff);
return false;
}
template< class Rep, class Period >
bool try_lock_shared_for (const std::chrono::duration<Rep,Period>& rel_time)
{
return try_lock_shared_until(std::chrono::steady_clock::now() + rel_time);
}
};
#if __cplusplus >= 201402L
using std::shared_lock;
#else
// If not supplied by shared_mutex (eg. because C++14 is not supported), I
// supply the various helper classes that the header should have defined.
template<class Mutex>
class shared_lock
{
Mutex * mMutex;
bool mOwns;
// Reduce code redundancy
void verify_lockable (void)
{
using namespace std;
if (mMutex == nullptr)
throw system_error(make_error_code(errc::operation_not_permitted));
if (mOwns)
throw system_error(make_error_code(errc::resource_deadlock_would_occur));
}
public:
typedef Mutex mutex_type;
shared_lock (void) noexcept
: mMutex(nullptr), mOwns(false)
{
}
shared_lock (shared_lock<Mutex> && other) noexcept
: mMutex(other.mutex_), mOwns(other.owns_)
{
other.mMutex = nullptr;
other.mOwns = false;
}
explicit shared_lock (mutex_type & m)
: mMutex(&m), mOwns(true)
{
mMutex->lock_shared();
}
shared_lock (mutex_type & m, defer_lock_t) noexcept
: mMutex(&m), mOwns(false)
{
}
shared_lock (mutex_type & m, adopt_lock_t)
: mMutex(&m), mOwns(true)
{
}
shared_lock (mutex_type & m, try_to_lock_t)
: mMutex(&m), mOwns(m.try_lock_shared())
{
}
template< class Rep, class Period >
shared_lock( mutex_type& m, const std::chrono::duration<Rep,Period>& timeout_duration )
: mMutex(&m), mOwns(m.try_lock_shared_for(timeout_duration))
{
}
template< class Clock, class Duration >
shared_lock( mutex_type& m, const std::chrono::time_point<Clock,Duration>& timeout_time )
: mMutex(&m), mOwns(m.try_lock_shared_until(timeout_time))
{
}
shared_lock& operator= (shared_lock<Mutex> && other) noexcept
{
if (&other != this)
{
if (mOwns)
mMutex->unlock_shared();
mMutex = other.mMutex;
mOwns = other.mOwns;
other.mMutex = nullptr;
other.mOwns = false;
}
return *this;
}
~shared_lock (void)
{
if (mOwns)
mMutex->unlock_shared();
}
shared_lock (const shared_lock<Mutex> &) = delete;
shared_lock& operator= (const shared_lock<Mutex> &) = delete;
// Shared locking
void lock (void)
{
verify_lockable();
mMutex->lock_shared();
mOwns = true;
}
bool try_lock (void)
{
verify_lockable();
mOwns = mMutex->try_lock_shared();
return mOwns;
}
template< class Clock, class Duration >
bool try_lock_until( const std::chrono::time_point<Clock,Duration>& cutoff )
{
verify_lockable();
do
{
mOwns = mMutex->try_lock_shared();
if (mOwns)
return mOwns;
}
while (std::chrono::steady_clock::now() < cutoff);
return false;
}
template< class Rep, class Period >
bool try_lock_for (const std::chrono::duration<Rep,Period>& rel_time)
{
return try_lock_until(std::chrono::steady_clock::now() + rel_time);
}
void unlock (void)
{
using namespace std;
if (!mOwns)
throw system_error(make_error_code(errc::operation_not_permitted));
mMutex->unlock_shared();
mOwns = false;
}
// Modifiers
void swap (shared_lock<Mutex> & other) noexcept
{
using namespace std;
swap(mMutex, other.mMutex);
swap(mOwns, other.mOwns);
}
mutex_type * release (void) noexcept
{
mutex_type * ptr = mMutex;
mMutex = nullptr;
mOwns = false;
return ptr;
}
// Observers
mutex_type * mutex (void) const noexcept
{
return mMutex;
}
bool owns_lock (void) const noexcept
{
return mOwns;
}
explicit operator bool () const noexcept
{
return owns_lock();
}
};
template< class Mutex >
void swap( shared_lock<Mutex>& lhs, shared_lock<Mutex>& rhs ) noexcept
{
lhs.swap(rhs);
}
#endif // C++11
} // Namespace mingw_stdthread
namespace std
{
// Because of quirks of the compiler, the common "using namespace std;"
// directive would flatten the namespaces and introduce ambiguity where there
// was none. Direct specification (std::), however, would be unaffected.
// Take the safe option, and include only in the presence of MinGW's win32
// implementation.
#if (__cplusplus < 201703L) || (defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS))
using mingw_stdthread::shared_mutex;
#endif
#if (__cplusplus < 201402L) || (defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS))
using mingw_stdthread::shared_timed_mutex;
using mingw_stdthread::shared_lock;
#elif !defined(MINGW_STDTHREAD_REDUNDANCY_WARNING) // Skip repetition
#define MINGW_STDTHREAD_REDUNDANCY_WARNING
#pragma message "This version of MinGW seems to include a win32 port of\
pthreads, and probably already has C++ std threading classes implemented,\
based on pthreads. These classes, found in namespace std, are not overridden\
by the mingw-std-thread library. If you would still like to use this\
implementation (as it is more lightweight), use the classes provided in\
namespace mingw_stdthread."
#endif
} // Namespace std
#endif // MINGW_SHARED_MUTEX_H_

View File

@ -0,0 +1,360 @@
/**
* @file mingw.thread.h
* @brief std::thread implementation for MinGW
* (c) 2013-2016 by Mega Limited, Auckland, New Zealand
* @author Alexander Vassilev
*
* @copyright Simplified (2-clause) BSD License.
* You should have received a copy of the license along with this
* program.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* @note
* This file may become part of the mingw-w64 runtime package. If/when this happens,
* the appropriate license will be added, i.e. this code will become dual-licensed,
* and the current BSD 2-clause license will stay.
*/
#ifndef WIN32STDTHREAD_H
#define WIN32STDTHREAD_H
#if !defined(__cplusplus) || (__cplusplus < 201103L)
#error A C++11 compiler is required!
#endif
// Use the standard classes for std::, if available.
#include <thread>
#include <cstddef> // For std::size_t
#include <cerrno> // Detect error type.
#include <exception> // For std::terminate
#include <system_error> // For std::system_error
#include <functional> // For std::hash
#include <tuple> // For std::tuple
#include <chrono> // For sleep timing.
#include <memory> // For std::unique_ptr
#include <iosfwd> // Stream output for thread ids.
#include <utility> // For std::swap, std::forward
#include "mingw.invoke.h"
#if (defined(__MINGW32__) && !defined(__MINGW64_VERSION_MAJOR))
#pragma message "The Windows API that MinGW-w32 provides is not fully compatible\
with Microsoft's API. We'll try to work around this, but we can make no\
guarantees. This problem does not exist in MinGW-w64."
#include <windows.h> // No further granularity can be expected.
#else
#include <synchapi.h> // For WaitForSingleObject
#include <handleapi.h> // For CloseHandle, etc.
#include <sysinfoapi.h> // For GetNativeSystemInfo
#include <processthreadsapi.h> // For GetCurrentThreadId
#endif
#include <process.h> // For _beginthreadex
#ifndef NDEBUG
#include <cstdio>
#endif
#if !defined(_WIN32_WINNT) || (_WIN32_WINNT < 0x0501)
#error To use the MinGW-std-threads library, you will need to define the macro _WIN32_WINNT to be 0x0501 (Windows XP) or higher.
#endif
// Instead of INVALID_HANDLE_VALUE, _beginthreadex returns 0.
namespace mingw_stdthread
{
namespace detail
{
template<std::size_t...>
struct IntSeq {};
template<std::size_t N, std::size_t... S>
struct GenIntSeq : GenIntSeq<N-1, N-1, S...> { };
template<std::size_t... S>
struct GenIntSeq<0, S...> { typedef IntSeq<S...> type; };
// Use a template specialization to avoid relying on compiler optimization
// when determining the parameter integer sequence.
template<class Func, class T, typename... Args>
class ThreadFuncCall;
// We can't define the Call struct in the function - the standard forbids template methods in that case
template<class Func, std::size_t... S, typename... Args>
class ThreadFuncCall<Func, detail::IntSeq<S...>, Args...>
{
static_assert(sizeof...(S) == sizeof...(Args), "Args must match.");
using Tuple = std::tuple<typename std::decay<Args>::type...>;
typename std::decay<Func>::type mFunc;
Tuple mArgs;
public:
ThreadFuncCall(Func&& aFunc, Args&&... aArgs)
: mFunc(std::forward<Func>(aFunc)),
mArgs(std::forward<Args>(aArgs)...)
{
}
void callFunc()
{
detail::invoke(std::move(mFunc), std::move(std::get<S>(mArgs)) ...);
}
};
// Allow construction of threads without exposing implementation.
class ThreadIdTool;
} // Namespace "detail"
class thread
{
public:
class id
{
DWORD mId = 0;
friend class thread;
friend class std::hash<id>;
friend class detail::ThreadIdTool;
explicit id(DWORD aId) noexcept : mId(aId){}
public:
id (void) noexcept = default;
friend bool operator==(id x, id y) noexcept {return x.mId == y.mId; }
friend bool operator!=(id x, id y) noexcept {return x.mId != y.mId; }
friend bool operator< (id x, id y) noexcept {return x.mId < y.mId; }
friend bool operator<=(id x, id y) noexcept {return x.mId <= y.mId; }
friend bool operator> (id x, id y) noexcept {return x.mId > y.mId; }
friend bool operator>=(id x, id y) noexcept {return x.mId >= y.mId; }
template<class _CharT, class _Traits>
friend std::basic_ostream<_CharT, _Traits>&
operator<<(std::basic_ostream<_CharT, _Traits>& __out, id __id)
{
if (__id.mId == 0)
{
return __out << "(invalid std::thread::id)";
}
else
{
return __out << __id.mId;
}
}
};
private:
static constexpr HANDLE kInvalidHandle = nullptr;
static constexpr DWORD kInfinite = 0xffffffffl;
HANDLE mHandle;
id mThreadId;
template <class Call>
static unsigned __stdcall threadfunc(void* arg)
{
std::unique_ptr<Call> call(static_cast<Call*>(arg));
call->callFunc();
return 0;
}
static unsigned int _hardware_concurrency_helper() noexcept
{
SYSTEM_INFO sysinfo;
// This is one of the few functions used by the library which has a nearly-
// equivalent function defined in earlier versions of Windows. Include the
// workaround, just as a reminder that it does exist.
#if defined(_WIN32_WINNT) && (_WIN32_WINNT >= 0x0501)
::GetNativeSystemInfo(&sysinfo);
#else
::GetSystemInfo(&sysinfo);
#endif
return sysinfo.dwNumberOfProcessors;
}
public:
typedef HANDLE native_handle_type;
id get_id() const noexcept {return mThreadId;}
native_handle_type native_handle() const {return mHandle;}
thread(): mHandle(kInvalidHandle), mThreadId(){}
thread(thread&& other)
:mHandle(other.mHandle), mThreadId(other.mThreadId)
{
other.mHandle = kInvalidHandle;
other.mThreadId = id{};
}
thread(const thread &other)=delete;
template<class Func, typename... Args>
explicit thread(Func&& func, Args&&... args) : mHandle(), mThreadId()
{
using ArgSequence = typename detail::GenIntSeq<sizeof...(Args)>::type;
using Call = detail::ThreadFuncCall<Func, ArgSequence, Args...>;
auto call = new Call(
std::forward<Func>(func), std::forward<Args>(args)...);
unsigned id_receiver;
auto int_handle = _beginthreadex(NULL, 0, threadfunc<Call>,
static_cast<LPVOID>(call), 0, &id_receiver);
if (int_handle == 0)
{
mHandle = kInvalidHandle;
int errnum = errno;
delete call;
// Note: Should only throw EINVAL, EAGAIN, EACCES
throw std::system_error(errnum, std::generic_category());
} else {
mThreadId.mId = id_receiver;
mHandle = reinterpret_cast<HANDLE>(int_handle);
}
}
bool joinable() const {return mHandle != kInvalidHandle;}
// Note: Due to lack of synchronization, this function has a race condition
// if called concurrently, which leads to undefined behavior. The same applies
// to all other member functions of this class, but this one is mentioned
// explicitly.
void join()
{
using namespace std;
if (get_id() == id(GetCurrentThreadId()))
throw system_error(make_error_code(errc::resource_deadlock_would_occur));
if (mHandle == kInvalidHandle)
throw system_error(make_error_code(errc::no_such_process));
if (!joinable())
throw system_error(make_error_code(errc::invalid_argument));
WaitForSingleObject(mHandle, kInfinite);
CloseHandle(mHandle);
mHandle = kInvalidHandle;
mThreadId = id{};
}
~thread()
{
if (joinable())
{
#ifndef NDEBUG
std::printf("Error: Must join() or detach() a thread before \
destroying it.\n");
#endif
std::terminate();
}
}
thread& operator=(const thread&) = delete;
thread& operator=(thread&& other) noexcept
{
if (joinable())
{
#ifndef NDEBUG
std::printf("Error: Must join() or detach() a thread before \
moving another thread to it.\n");
#endif
std::terminate();
}
swap(std::forward<thread>(other));
return *this;
}
void swap(thread&& other) noexcept
{
std::swap(mHandle, other.mHandle);
std::swap(mThreadId.mId, other.mThreadId.mId);
}
static unsigned int hardware_concurrency() noexcept
{
static unsigned int cached = _hardware_concurrency_helper();
return cached;
}
void detach()
{
if (!joinable())
{
using namespace std;
throw system_error(make_error_code(errc::invalid_argument));
}
if (mHandle != kInvalidHandle)
{
CloseHandle(mHandle);
mHandle = kInvalidHandle;
}
mThreadId = id{};
}
};
namespace detail
{
class ThreadIdTool
{
public:
static thread::id make_id (DWORD base_id) noexcept
{
return thread::id(base_id);
}
};
} // Namespace "detail"
namespace this_thread
{
inline thread::id get_id() noexcept
{
return detail::ThreadIdTool::make_id(GetCurrentThreadId());
}
inline void yield() noexcept {Sleep(0);}
template< class Rep, class Period >
void sleep_for( const std::chrono::duration<Rep,Period>& sleep_duration)
{
static constexpr DWORD kInfinite = 0xffffffffl;
using namespace std::chrono;
using rep = milliseconds::rep;
rep ms = duration_cast<milliseconds>(sleep_duration).count();
while (ms > 0)
{
constexpr rep kMaxRep = static_cast<rep>(kInfinite - 1);
auto sleepTime = (ms < kMaxRep) ? ms : kMaxRep;
Sleep(static_cast<DWORD>(sleepTime));
ms -= sleepTime;
}
}
template <class Clock, class Duration>
void sleep_until(const std::chrono::time_point<Clock,Duration>& sleep_time)
{
sleep_for(sleep_time-Clock::now());
}
}
} // Namespace mingw_stdthread
namespace std
{
// Because of quirks of the compiler, the common "using namespace std;"
// directive would flatten the namespaces and introduce ambiguity where there
// was none. Direct specification (std::), however, would be unaffected.
// Take the safe option, and include only in the presence of MinGW's win32
// implementation.
#if defined(__MINGW32__ ) && !defined(_GLIBCXX_HAS_GTHREADS)
using mingw_stdthread::thread;
// Remove ambiguity immediately, to avoid problems arising from the above.
//using std::thread;
namespace this_thread
{
using namespace mingw_stdthread::this_thread;
}
#elif !defined(MINGW_STDTHREAD_REDUNDANCY_WARNING) // Skip repetition
#define MINGW_STDTHREAD_REDUNDANCY_WARNING
#pragma message "This version of MinGW seems to include a win32 port of\
pthreads, and probably already has C++11 std threading classes implemented,\
based on pthreads. These classes, found in namespace std, are not overridden\
by the mingw-std-thread library. If you would still like to use this\
implementation (as it is more lightweight), use the classes provided in\
namespace mingw_stdthread."
#endif
// Specialize hash for this implementation's thread::id, even if the
// std::thread::id already has a hash.
template<>
struct hash<mingw_stdthread::thread::id>
{
typedef mingw_stdthread::thread::id argument_type;
typedef size_t result_type;
size_t operator() (const argument_type & i) const noexcept
{
return i.mId;
}
};
}
#endif // WIN32STDTHREAD_H

View File

@ -0,0 +1,18 @@
project(stdthreadtest)
cmake_minimum_required(VERSION 3.0)
string(CONCAT mingw_stdthreads_tests_compile_options_docstring
"Compiler flags used to compile mingw-stdthreads's tests. By default "
"it's -std=c++11 -Wall -Wextra")
set(MINGW_STDTHREADS_TESTS_COMPILE_OPTIONS "-std=c++11;-Wall;-Wextra"
CACHE STRING ${mingw_stdthreads_tests_compile_options_docstring})
set(MINGW_STDTHREADS_TESTS_ADDITIONAL_LINKER_FLAGS "" CACHE STRING
"Optional linker flags to be passed when linking mingw-stdthreads's tests")
add_executable(${PROJECT_NAME} tests.cpp)
target_compile_options(${PROJECT_NAME} PRIVATE
${MINGW_STDTHREADS_TESTS_COMPILE_OPTIONS})
target_link_libraries(${PROJECT_NAME} PRIVATE mingw_stdthreads)
target_link_libraries(${PROJECT_NAME} PRIVATE
${MINGW_STDTHREADS_TESTS_ADDITIONAL_LINKER_FLAGS})

View File

@ -0,0 +1,450 @@
#ifndef MINGW_STDTHREADS_GENERATED_STDHEADERS
#include <mingw.thread.h>
#include <mingw.mutex.h>
#include <mingw.condition_variable.h>
#include <mingw.shared_mutex.h>
#include <mingw.future.h>
#else
#include <thread>
#include <mutex>
#include <condition_variable>
#include <shared_mutex>
#include <future>
#endif
#include <atomic>
#include <cassert>
#include <string>
#include <iostream>
#include <typeinfo>
using namespace std;
int test_int = 42;
// Pre-declaration to suppress some warnings.
void test_call_once(int, char const *);
int cond = 0;
std::mutex m;
std::shared_mutex sm;
std::condition_variable cv;
std::condition_variable_any cv_any;
template<class ... Args>
void log (char const * fmtString, Args ...args) {
printf(fmtString, args...);
printf("\n");
fflush(stdout);
}
void test_call_once(int a, const char* str)
{
log("test_call_once called with a=%d, str=%s", a, str);
this_thread::sleep_for(std::chrono::milliseconds(500));
}
struct TestMove
{
std::string mStr;
TestMove(const std::string& aStr): mStr(aStr){}
TestMove(TestMove&& other): mStr(other.mStr+" moved")
{ printf("%s: Object moved\n", mStr.c_str()); }
TestMove(const TestMove&) : mStr()
{
assert(false && "TestMove: Object COPIED instead of moved");
}
};
template<class T>
void test_future_set_value (promise<T> & promise)
{
promise.set_value(T(test_int));
}
template<>
void test_future_set_value (promise<void> & promise)
{
promise.set_value();
}
template<class T>
bool test_future_get_value (future<T> & future)
{
return (future.get() == T(test_int));
}
template<>
bool test_future_get_value (future<void> & future)
{
future.get();
return true;
}
template<class T>
struct CustomAllocator
{
CustomAllocator (void) noexcept
{
}
template<class U>
CustomAllocator (CustomAllocator<U> const &) noexcept
{
}
template<class U>
CustomAllocator<T> & operator= (CustomAllocator<U> const &) noexcept
{
return *this;
}
typedef T value_type;
T * allocate (size_t n)
{
log("Used custom allocator to allocate %zu object(s).", n);
return static_cast<T*>(std::malloc(n * sizeof(T)));
}
void deallocate (T * ptr, size_t n)
{
log("Used custom allocator to deallocate %zu object(s).", n);
std::free(ptr);
}
};
template<class T>
void test_future ()
{
static_assert(is_move_constructible<promise<T> >::value,
"std::promise must be move-constructible.");
static_assert(is_move_assignable<promise<T> >::value,
"std::promise must be move-assignable.");
static_assert(!is_copy_constructible<promise<T> >::value,
"std::promise must not be copy-constructible.");
static_assert(!is_copy_assignable<promise<T> >::value,
"std::promise must not be copy-assignable.");
static_assert(is_move_constructible<future<T> >::value,
"std::future must be move-constructible.");
static_assert(is_move_assignable<future<T> >::value,
"std::future must be move-assignable.");
static_assert(!is_copy_constructible<future<T> >::value,
"std::future must not be copy-constructible.");
static_assert(!is_copy_assignable<future<T> >::value,
"std::future must not be copy-assignable.");
static_assert(is_move_constructible<shared_future<T> >::value,
"std::shared_future must be move-constructible.");
static_assert(is_move_assignable<shared_future<T> >::value,
"std::shared_future must be move-assignable.");
static_assert(is_copy_constructible<shared_future<T> >::value,
"std::shared_future must be copy-constructible.");
static_assert(is_copy_assignable<shared_future<T> >::value,
"std::shared_future must be copy-assignable.");
log("\tMaking a few promises, and getting their futures...");
promise<T> promise_value, promise_exception, promise_broken, promise_late;
future<T> future_value = promise_value.get_future();
future<T> future_exception = promise_exception.get_future();
future<T> future_broken = promise_broken.get_future();
future<T> future_late = promise_late.get_future();
try {
future<T> impossible_future = promise_value.get_future();
log("WARNING: Promise failed to detect that its future was already retrieved.");
} catch(...) {
log("\tPromise successfully prevented redundant future retrieval.");
}
log("\tPassing promises to a new thread...");
thread t ([](promise<T> p_value, promise<T> p_exception, promise<T>, promise<T> p_late)
{
this_thread::sleep_for(std::chrono::seconds(1));
try {
throw std::runtime_error("Thrown during the thread.");
} catch (...) {
p_late.set_exception_at_thread_exit(std::current_exception());
}
test_future_set_value(p_value);
try {
throw std::runtime_error("Things happened as expected.");
} catch (...) {
p_exception.set_exception(std::current_exception());
}
this_thread::sleep_for(std::chrono::seconds(2));
},
std::move(promise_value),
std::move(promise_exception),
std::move(promise_broken),
std::move(promise_late));
t.detach();
try {
bool was_expected = test_future_get_value(future_value);
log("\tReceived %sexpected value.", (was_expected ? "" : "un"));
} catch (...) {
log("WARNING: Exception where there should be none!");
throw;
}
try {
test_future_get_value(future_exception);
log("WARNING: Got a value where there should be an exception!");
} catch (std::exception & e) {
log("\tReceived an exception (\"%s\") as expected.", e.what());
}
log("\tWaiting for the thread to exit...");
try {
test_future_get_value(future_late);
log("WARNING: Got a value where there should be an exception!");
} catch (std::exception & e) {
log("\tReceived an exception (\"%s\") as expected.", e.what());
}
try {
test_future_get_value(future_broken);
log("WARNING: Got a value where there should be an exception!");
} catch (std::future_error & e) {
log("\tReceived a future_error (\"%s\") as expected.", e.what());
}
log("\tDeferring a function...");
auto async_deferred = async(launch::deferred, [] (void) -> T
{
std::hash<std::thread::id> hasher;
log("\t\tDeferred function called on thread %zu", hasher(std::this_thread::get_id()));
if (!is_void<T>::value)
return T(test_int);
});
log("\tCalling a function asynchronously...");
auto async_async = async(launch::async, [] (void) -> T
{
std::hash<std::thread::id> hasher;
log("\t\tAsynchronous function called on thread %zu", hasher(std::this_thread::get_id()));
if (!is_void<T>::value)
return T(test_int);
});
log("\tLetting the implementation decide...");
auto async_either = async([] (thread::id other_id) -> T
{
std::hash<thread::id> hasher;
log("\t\tFunction called on thread %zu. Implementation chose %s execution.", hasher(this_thread::get_id()), (this_thread::get_id() == other_id) ? "deferred" : "asynchronous");
if (!is_void<T>::value)
return T(test_int);
}, this_thread::get_id());
log("\tFetching asynchronous result.");
test_future_get_value(async_async);
log("\tFetching deferred result.");
test_future_get_value(async_deferred);
log("\tFetching implementation-defined result.");
test_future_get_value(async_either);
log("\tTesting async on pointer-to-member-function.");
struct Helper
{
thread::id other_id;
T call (void) const
{
std::hash<thread::id> hasher;
log("\t\tFunction called on thread %zu. Implementation chose %s execution.", hasher(this_thread::get_id()), (this_thread::get_id() == other_id) ? "deferred" : "asynchronous");
if (!is_void<T>::value)
return T(test_int);
}
} test_class { this_thread::get_id() };
auto async_member = async(Helper::call, test_class);
log("\tFetching result.");
test_future_get_value(async_member);
}
#define TEST_SL_MV_CPY(ClassName) \
static_assert(std::is_standard_layout<ClassName>::value, \
"ClassName does not satisfy concept StandardLayoutType."); \
static_assert(!std::is_move_constructible<ClassName>::value, \
"ClassName must not be move-constructible."); \
static_assert(!std::is_move_assignable<ClassName>::value, \
"ClassName must not be move-assignable."); \
static_assert(!std::is_copy_constructible<ClassName>::value, \
"ClassName must not be copy-constructible."); \
static_assert(!std::is_copy_assignable<ClassName>::value, \
"ClassName must not be copy-assignable.");
int main()
{
#ifdef MINGW_STDTHREADS_GENERATED_STDHEADERS
std::cout << "Using cmake-generated stdheaders, ";
#endif
static_assert(std::is_trivially_copyable<thread::id>::value,
"thread::id must be trivially copyable.");
TEST_SL_MV_CPY(mutex)
TEST_SL_MV_CPY(recursive_mutex)
TEST_SL_MV_CPY(timed_mutex)
TEST_SL_MV_CPY(recursive_timed_mutex)
TEST_SL_MV_CPY(shared_mutex)
TEST_SL_MV_CPY(shared_timed_mutex)
TEST_SL_MV_CPY(condition_variable)
TEST_SL_MV_CPY(condition_variable_any)
static_assert(!std::is_move_constructible<once_flag>::value,
"once_flag must not be move-constructible.");
static_assert(!std::is_move_assignable<once_flag>::value,
"once_flag must not be move-assignable.");
static_assert(!std::is_copy_constructible<once_flag>::value,
"once_flag must not be copy-constructible.");
static_assert(!std::is_copy_assignable<once_flag>::value,
"once_flag must not be copy-assignable.");
// With C++ feature level and target Windows version potentially affecting
// behavior, make this information visible.
{
switch (__cplusplus)
{
case 201103L: std::cout << "Compiled in C++11"; break;
case 201402L: std::cout << "Compiled in C++14"; break;
case 201703L: std::cout << "Compiled in C++17"; break;
default: std::cout << "Compiled in a non-conforming C++ compiler";
}
std::cout << ", targeting Windows ";
static_assert(WINVER > 0x0500, "Windows NT and earlier are not supported.");
switch (WINVER)
{
case 0x0501: std::cout << "XP"; break;
case 0x0502: std::cout << "Server 2003"; break;
case 0x0600: std::cout << "Vista"; break;
case 0x0601: std::cout << "7"; break;
case 0x0602: std::cout << "8"; break;
case 0x0603: std::cout << "8.1"; break;
case 0x0A00: std::cout << "10"; break;
default: std::cout << "10+";
}
std::cout << "\n";
}
{
log("Testing serialization and hashing for thread::id...");
std::cout << "Serialization:\t" << this_thread::get_id() << "\n";
std::hash<thread::id> hasher;
std::cout << "Hash:\t" << hasher(this_thread::get_id()) << "\n";
}
// Regression test: Thread must copy any argument that is passed by value.
{
std::vector<std::thread> loop_threads;
std::atomic<int> i_vals_touched [4];// { 0, 0, 0, 0 };
for (int i = 0; i < 4; ++i)
i_vals_touched[i].store(0, std::memory_order_relaxed);
for (int i = 0; i < 4; ++i)
{
loop_threads.push_back(std::thread([&](int c)
{
log("For-loop test thread got value: %i", c);
i_vals_touched[c].fetch_add(1, std::memory_order_relaxed);
}, i));
}
for (std::thread & thr : loop_threads)
thr.join();
for (int i = 0; i < 4; ++i)
{
if (i_vals_touched[i] != 1)
{
log("FATAL: Threads are not copying arguments!");
return 1;
}
}
}
std::thread t([](TestMove&& a, const char* b, int c) mutable
{
try
{
log("Worker thread started, sleeping for a while...");
// Thread might move the string more than once.
assert(a.mStr.substr(0, 15) == "move test moved");
assert(!strcmp(b, "test message"));
assert(c == -20);
auto move2nd = std::move(a); //test move to final destination
this_thread::sleep_for(std::chrono::milliseconds(1000));
{
lock_guard<mutex> lock(m);
cond = 1;
log("Notifying condvar");
cv.notify_all();
}
this_thread::sleep_for(std::chrono::milliseconds(500));
{
lock_guard<decltype(sm)> lock(sm);
cond = 2;
log("Notifying condvar");
cv_any.notify_all();
}
this_thread::sleep_for(std::chrono::milliseconds(500));
{
lock_guard<decltype(sm)> lock(sm);
cond = 3;
log("Notifying condvar");
cv_any.notify_all();
}
log("Worker thread finishing");
}
catch(std::exception& e)
{
printf("EXCEPTION in worker thread: %s\n", e.what());
}
},
TestMove("move test"), "test message", -20);
try
{
log("Main thread: Locking mutex, waiting on condvar...");
{
std::unique_lock<decltype(m)> lk(m);
cv.wait(lk, []{ return cond >= 1;} );
log("condvar notified, cond = %d", cond);
assert(lk.owns_lock());
}
log("Main thread: Locking shared_mutex, waiting on condvar...");
{
std::unique_lock<decltype(sm)> lk(sm);
cv_any.wait(lk, []{ return cond >= 2;} );
log("condvar notified, cond = %d", cond);
assert(lk.owns_lock());
}
log("Main thread: Locking shared_mutex in shared mode, waiting on condvar...");
{
std::shared_lock<decltype(sm)> lk(sm);
cv_any.wait(lk, []{ return cond >= 3;} );
log("condvar notified, cond = %d", cond);
assert(lk.owns_lock());
}
log("Main thread: Waiting on worker join...");
t.join();
log("Main thread: Worker thread joined");
fflush(stdout);
}
catch(std::exception& e)
{
log("EXCEPTION in main thread: %s", e.what());
}
once_flag of;
call_once(of, test_call_once, 1, "test");
call_once(of, test_call_once, 1, "ERROR! Should not be called second time");
log("Test complete");
{
log("Testing implementation of <future>...");
test_future<int>();
test_future<void>();
test_future<int &>();
test_future<int const &>();
test_future<int volatile &>();
test_future<int const volatile &>();
log("Testing <future>'s use of allocators. Should allocate, then deallocate.");
promise<int> allocated_promise (std::allocator_arg, CustomAllocator<unsigned>());
allocated_promise.set_value(7);
}
return 0;
}

View File

@ -0,0 +1,226 @@
<#
.SYNOPSIS
Generate std-like headers which you can use just like standard c++'s ones.
For example include <thread>.
.PARAMETER GccPath
Path to GCC. Will try to use the default one from $env:Path if not
specified.
.PARAMETER MinGWStdThreadsPath
Path to mingw-std-threads folder. Will try to use $PSScriptRoot/.. if not
specified.
.PARAMETER DestinationFolder
Destination folder where generated headers will be saved to
.PARAMETER GenerateCompilerWrapperWithFileName
If specified, will be generated a wrapper batch script for g++ which automatically
adds $DestinationFolder as an include path
.PARAMETER Interactive
Use this switch if you want to pass parameters interactively
#>
[CmdletBinding(PositionalBinding = $false)]
param (
# Path of GCC
[Parameter(Mandatory = $false,
ValueFromPipelineByPropertyName = $true,
ParameterSetName = "NonInteractive",
HelpMessage = "Pathtof GCC. Will try to use the default one from `$env:Path if not specified.")]
[string]
$GccPath,
# Path of mingw-std-threads
[Parameter(Mandatory = $false,
ValueFromPipelineByPropertyName = $true,
ParameterSetName = "NonInteractive",
HelpMessage = "Path to mingw-std-threads folder. Will try to use `$PSScriptRoot/.. if not specified.")]
[string]
$MinGWStdThreadsPath,
# Destination folder path
[Parameter(Mandatory = $true,
ValueFromPipelineByPropertyName = $true,
ParameterSetName = "NonInteractive",
HelpMessage = "Destination folder where generated headers will be saved to")]
[ValidateNotNullOrEmpty()]
[string]
$DestinationFolder,
# Compiler wrapper path
[Parameter(Mandatory = $false,
ValueFromPipelineByPropertyName = $true,
ParameterSetName = "NonInteractive",
HelpMessage = "If specified, will generate a wrapper batch script for g++ which automatically adds `$DestinationFolder as an include path")]
[string]
$GenerateCompilerWrapperWithFileName,
# Interactive Switch
[Parameter(ParameterSetName = "Interactive")]
[switch]
$Interactive = $false
)
# Stop execution when encountering any error (includeing Write-Error command)
$ErrorActionPreference = "Stop";
# headers to be generated
$headers = @("condition_variable", "future", "mutex", "shared_mutex", "thread")
# ask for user input in interactive mode
if ($Interactive) {
Write-Host "Generate std-like headers which you can use just like standard c++'s ones."
Write-Host "Something like `"include <thread>`"."
$DestinationFolder = Read-Host -Prompt "Destination folder into which headers will be generated"
$GccPath = Read-Host -Prompt "Path to GCC, optional. Press Enter to let it be retrieved from PATH"
$MinGWStdThreadsPath = Read-Host -Prompt "Path to mingw-std-threads folder, optional. Press Enter to use default value"
$GenerateCompilerWrapperWithFileName = Read-Host "Optional path to which a wrapper batch script for g++ will be created. It will automatically use $DestinationFolder as an include path. Press Enter to skip"
}
if (-not $GccPath) {
$GccPath = "gcc"
}
# set default value of $MinGWStdThreadsPath
if (-not $MinGWStdThreadsPath) {
$scriptFilePath = $null
if ($MyInvocation.MyCommand.CommandType -eq "ExternalScript") {
$scriptFilePath = $MyInvocation.MyCommand.Definition
}
else {
$scriptFilePath = [Environment]::GetCommandLineArgs()[0]
}
$MinGWStdThreadsPath = (Get-Item -LiteralPath $scriptFilePath).Directory.Parent.FullName
}
# Normalize paths
$GccPath = (Get-Command -Name $GccPath).Source
$MinGWStdThreadsPath = Resolve-Path -LiteralPath $MinGWStdThreadsPath
$DestinationFolder = New-Item -Path $DestinationFolder -ItemType "Directory" -Force
Write-Output "GccPath: $GccPath"
Write-Output "MinGWStdThreadsPath: $MinGWStdThreadsPath"
Write-Output "DestinationFolder: $DestinationFolder"
if ($GenerateCompilerWrapperWithFileName) {
Write-Output "GenerateCompilerWrapperWithFileName: $GenerateCompilerWrapperWithFileName"
}
# Find path of real headers
Write-Output "Retrieving system header search paths..."
$readingIncludePath = $false
# Empty array which will later store include paths
$includePaths = @()
# Launch GCC
$processStartInfo = New-Object -TypeName "System.Diagnostics.ProcessStartInfo"
$processStartInfo.FileName = $GccPath
$processStartInfo.Arguments = "-xc++ -E -v -"
$processStartInfo.RedirectStandardInput = $true
$processStartInfo.RedirectStandardOutput = $true
$processStartInfo.RedirectStandardError = $true
$processStartInfo.UseShellExecute = $false
$outputLines = @()
$gcc = New-Object -TypeName "System.Diagnostics.Process"
try {
$gcc.StartInfo = $processStartInfo
$gcc.Start() | Out-Null
$gcc.StandardInput.Close()
$gcc.WaitForExit()
$output = $gcc.StandardError.ReadToEnd()
$outputLines = $output -split "[\r\n]" |
ForEach-Object { return $_.Trim() } |
Where-Object { return $_.Length -gt 0 }
}
finally {
$gcc.StandardInput.Dispose()
$gcc.StandardOutput.Dispose()
$gcc.StandardError.Dispose()
$gcc.Dispose()
}
# Parse Output
foreach ($line in $outputLines) {
if (-not $readingIncludePath) {
if ($line -match "#include <...> search starts here:") {
$readingIncludePath = $true
}
continue
}
if ($line -match "End of search list.") {
break
}
Write-Output "Retrieved search path: $line"
$includePaths += $line
}
if ($includePaths.Count -eq 0) {
Write-Error "Error: didn't find any #inlcude <...> search paths"
}
# look for std header paths
Write-Output "Searching for standard headers..."
$stdHeaders = @()
# set a label called "nextHeader" to allow continue with outer loop
:nextHeader foreach ($header in $headers) {
# check if mingw-std-threads headers exist
$myHeader = "mingw.$header.h"
$myHeader = Join-Path -Path $MinGWStdThreadsPath -ChildPath $myHeader
if (-not (Test-Path -LiteralPath $myHeader -PathType "Leaf")) {
Write-Error "Error: mingw-std-threads header not found: $myHeader"
}
foreach ($inludePath in $includePaths) {
$fullPath = Join-Path -Path $inludePath -ChildPath $header
if (Test-Path -LiteralPath $fullPath -PathType "Leaf") {
$fullPath = (Get-Item -LiteralPath $fullPath).FullName
$stdHeaders += $fullPath
Write-Output "Found std header: $fullPath"
# if found matching header, continue with outer loop
continue nextHeader
}
}
Write-Error "Error: didn't find $header in any search paths"
}
# generate headers
Write-Output "Generating headers..."
foreach ($stdHeader in $stdHeaders) {
$headerFileName = (Get-Item -LiteralPath $stdHeader).Name
$myHeader = "mingw.$headerFileName.h"
$myHeader = Join-Path -Path $MinGWStdThreadsPath -ChildPath $myHeader
Write-Output "Generating <$headerFileName> from $myHeader and $stdHeader..."
# both two headers should already have include guards
# but we still add a #pragma once just to be safe
$content = "#pragma once`r`n"
$content += "#include `"$stdHeader`"`r`n"
$content += "#include `"$myHeader`"`r`n";
$outputFileName = Join-Path -Path $DestinationFolder -ChildPath $headerFileName
Write-Output "Writing file: $outputFileName"
# use .NET's method to output lines to avoid UTF-8 BOM
$noBomEncoding = New-Object -TypeName "System.Text.UTF8Encoding" -ArgumentList $false
[IO.File]::WriteAllText($outputFileName, $content, $noBomEncoding)
}
$message = "Successfully generated std-like headers. Use them by adding "
$message += "`"-I$DestinationFolder`" to your compiler command line parameters"
Write-Output $message
if ($GenerateCompilerWrapperWithFileName) {
$compilerFolder = Split-Path -LiteralPath $GccPath
$compiler = Join-Path -Path $compilerFolder -ChildPath "g++"
$command = "@echo off`r`n"
$command += "$compiler %* `"-I$DestinationFolder`""
$wrapper = New-Item -Path $GenerateCompilerWrapperWithFileName -ItemType "File" -Force
# use .NET's method to output lines to avoid UTF-8 BOM
$noBomEncoding = New-Object -TypeName "System.Text.UTF8Encoding" -ArgumentList $false
[IO.File]::WriteAllText($wrapper, $command, $noBomEncoding)
Write-Output "Wrapper batch script successfully generated to $wrapper"
}

View File

@ -0,0 +1 @@
powershell -NonInteractive -ExecutionPolicy ByPass -File %~dp0Generate-StdLikeHeaders.ps1 %*

View File

@ -0,0 +1 @@
powershell -ExecutionPolicy ByPass -File %~dp0Generate-StdLikeHeaders.ps1 -Interactive

71
randomx/program.hpp Normal file
View File

@ -0,0 +1,71 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cstdint>
#include <ostream>
#include "common.hpp"
#include "instruction.hpp"
#include "blake2/endian.h"
namespace randomx {
struct ProgramConfiguration {
uint64_t eMask[2];
uint32_t readReg0, readReg1, readReg2, readReg3;
};
class Program {
public:
Instruction& operator()(int pc) {
return programBuffer[pc];
}
friend std::ostream& operator<<(std::ostream& os, const Program& p) {
p.print(os);
return os;
}
uint64_t getEntropy(int i) {
return load64(&entropyBuffer[i]);
}
uint32_t getSize() {
return RANDOMX_PROGRAM_SIZE;
}
private:
void print(std::ostream& os) const {
for (int i = 0; i < RANDOMX_PROGRAM_SIZE; ++i) {
auto instr = programBuffer[i];
os << instr;
}
}
uint64_t entropyBuffer[16];
Instruction programBuffer[RANDOMX_PROGRAM_SIZE]; //256 每个指令64bit
};
static_assert(sizeof(Program) % 64 == 0, "Invalid size of class randomx::Program");
}

492
randomx/randomx.cpp Normal file
View File

@ -0,0 +1,492 @@
/*
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "mingw-std-threads-master/mingw.thread.h" //这个时因为使用#include <thread>会报错mingw中thread不支持std如果时其他编译器如vs等就不需要
#include "randomx.h"
#include "dataset.hpp"
#include "vm_interpreted.hpp"
#include "vm_interpreted_light.hpp"
#include "vm_compiled.hpp"
#include "vm_compiled_light.hpp"
#include "blake2/blake2.h"
#include "cpu.hpp"
#include <cassert>
#include <limits>
randomx_flags randomx_get_flags() {
randomx_flags flags = RANDOMX_HAVE_COMPILER ? RANDOMX_FLAG_JIT : RANDOMX_FLAG_DEFAULT;
randomx::Cpu cpu;
#ifdef __OpenBSD__
if (flags == RANDOMX_FLAG_JIT) {
flags |= RANDOMX_FLAG_SECURE;
}
#endif
if (HAVE_AES && cpu.hasAes()) {
flags |= RANDOMX_FLAG_HARD_AES;
}
if (randomx_argon2_impl_avx2() != nullptr && cpu.hasAvx2()) {
flags |= RANDOMX_FLAG_ARGON2_AVX2;
}
if (randomx_argon2_impl_ssse3() != nullptr && cpu.hasSsse3()) {
flags |= RANDOMX_FLAG_ARGON2_SSSE3;
}
return flags;
}
randomx_cache *randomx_alloc_cache(randomx_flags flags) {
randomx_cache *cache = nullptr;
auto impl = randomx::selectArgonImpl(flags);
if (impl == nullptr) {
return cache;
}
try {
cache = new randomx_cache();
cache->argonImpl = impl;
switch ((int)(flags & (RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES))) {
case RANDOMX_FLAG_DEFAULT:
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
cache->jit = nullptr;
cache->initialize = &randomx::initCache;
cache->datasetInit = &randomx::initDataset;
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize); //randomx::CacheSize =256MB
break;
case RANDOMX_FLAG_JIT:
cache->dealloc = &randomx::deallocCache<randomx::DefaultAllocator>;
cache->jit = new randomx::JitCompiler();
cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::CacheSize);
break;
case RANDOMX_FLAG_LARGE_PAGES:
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
cache->jit = nullptr;
cache->initialize = &randomx::initCache;
cache->datasetInit = &randomx::initDataset;
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize);
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
cache->dealloc = &randomx::deallocCache<randomx::LargePageAllocator>;
cache->jit = new randomx::JitCompiler();
cache->initialize = &randomx::initCacheCompile;
cache->datasetInit = cache->jit->getDatasetInitFunc();
cache->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::CacheSize);
break;
default:
UNREACHABLE;
}
}
catch (std::exception &ex) {
if (cache != nullptr) {
randomx_release_cache(cache);
cache = nullptr;
}
}
return cache;
}
void randomx_init_cache(randomx_cache *cache, const void *key, size_t keySize) {
assert(cache != nullptr);
assert(keySize == 0 || key != nullptr);
cache->initialize(cache, key, keySize);
//std::string cacheKey;
//cacheKey.assign((const char *)key, keySize); //将字符串key中keysize个长度的字符赋值给cacheKey;
//if (cache->cacheKey != cacheKey || !cache->isInitialized()) {
// cache->initialize(cache, key, keySize);
// cache->cacheKey = cacheKey;
//}
}
void randomx_release_cache(randomx_cache* cache) {
assert(cache != nullptr);
if (cache->memory != nullptr) {
cache->dealloc(cache);
}
delete cache;
}
randomx_dataset *randomx_alloc_dataset(randomx_flags flags) {
//fail on 32-bit systems if DatasetSize is >= 4 GiB
if (randomx::DatasetSize > std::numeric_limits<size_t>::max()) {
return nullptr;
}
//printf("xxxxx\n");
randomx_dataset *dataset = nullptr;
//try {
dataset = new randomx_dataset();
if (flags & RANDOMX_FLAG_LARGE_PAGES) {
dataset->dealloc = &randomx::deallocDataset<randomx::LargePageAllocator>;
dataset->memory = (uint8_t*)randomx::LargePageAllocator::allocMemory(randomx::DatasetSize);
}
else {
dataset->dealloc = &randomx::deallocDataset<randomx::DefaultAllocator>;
dataset->memory = (uint8_t*)randomx::DefaultAllocator::allocMemory(randomx::DatasetSize);
}
//}
//catch (std::exception &ex) {
// if (dataset != nullptr) {
// randomx_release_dataset(dataset);
// dataset = nullptr;
// }
//}
return dataset;
}
constexpr unsigned long DatasetItemCount = randomx::DatasetSize / RANDOMX_DATASET_ITEM_SIZE;
unsigned long randomx_dataset_item_count() {
return DatasetItemCount;
}
void randomx_init_dataset(randomx_dataset *dataset, randomx_cache *cache, unsigned long startItem, unsigned long itemCount) {
assert(dataset != nullptr);
assert(cache != nullptr);
assert(startItem < DatasetItemCount && itemCount <= DatasetItemCount);
assert(startItem + itemCount <= DatasetItemCount);
cache->datasetInit(cache, dataset->memory + startItem * randomx::CacheLineSize, startItem, startItem + itemCount);
}
void *randomx_get_dataset_memory(randomx_dataset *dataset) {
assert(dataset != nullptr);
return dataset->memory;
}
void randomx_release_dataset(randomx_dataset *dataset) {
assert(dataset != nullptr);
dataset->dealloc(dataset);
delete dataset;
}
randomx_vm *randomx_create_vm(randomx_flags flags, randomx_cache *cache, randomx_dataset *dataset) {
assert(cache != nullptr || (flags & RANDOMX_FLAG_FULL_MEM));
assert(cache == nullptr || cache->isInitialized());
assert(dataset != nullptr || !(flags & RANDOMX_FLAG_FULL_MEM));
randomx_vm *vm = nullptr;
try {
switch ((int)(flags & (RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES))) {
case RANDOMX_FLAG_DEFAULT:
vm = new randomx::InterpretedLightVmDefault();
break;
case RANDOMX_FLAG_FULL_MEM:
vm = new randomx::InterpretedVmDefault();
break;
case RANDOMX_FLAG_JIT:
if (flags & RANDOMX_FLAG_SECURE) {
vm = new randomx::CompiledLightVmDefaultSecure();
}
else {
vm = new randomx::CompiledLightVmDefault();
}
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT:
if (flags & RANDOMX_FLAG_SECURE) {
vm = new randomx::CompiledVmDefaultSecure();
}
else {
vm = new randomx::CompiledVmDefault();
}
break;
case RANDOMX_FLAG_HARD_AES:
vm = new randomx::InterpretedLightVmHardAes();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES:
vm = new randomx::InterpretedVmHardAes();
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
if (flags & RANDOMX_FLAG_SECURE) {
vm = new randomx::CompiledLightVmHardAesSecure();
}
else {
vm = new randomx::CompiledLightVmHardAes();
}
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES:
if (flags & RANDOMX_FLAG_SECURE) {
vm = new randomx::CompiledVmHardAesSecure();
}
else {
vm = new randomx::CompiledVmHardAes();
}
break;
case RANDOMX_FLAG_LARGE_PAGES:
vm = new randomx::InterpretedLightVmLargePage();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_LARGE_PAGES:
vm = new randomx::InterpretedVmLargePage();
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
if (flags & RANDOMX_FLAG_SECURE) {
vm = new randomx::CompiledLightVmLargePageSecure();
}
else {
vm = new randomx::CompiledLightVmLargePage();
}
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_LARGE_PAGES:
if (flags & RANDOMX_FLAG_SECURE) {
vm = new randomx::CompiledVmLargePageSecure();
}
else {
vm = new randomx::CompiledVmLargePage();
}
break;
case RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
vm = new randomx::InterpretedLightVmLargePageHardAes();
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
vm = new randomx::InterpretedVmLargePageHardAes();
break;
case RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
if (flags & RANDOMX_FLAG_SECURE) {
vm = new randomx::CompiledLightVmLargePageHardAesSecure();
}
else {
vm = new randomx::CompiledLightVmLargePageHardAes();
}
break;
case RANDOMX_FLAG_FULL_MEM | RANDOMX_FLAG_JIT | RANDOMX_FLAG_HARD_AES | RANDOMX_FLAG_LARGE_PAGES:
if (flags & RANDOMX_FLAG_SECURE) {
vm = new randomx::CompiledVmLargePageHardAesSecure();
}
else {
vm = new randomx::CompiledVmLargePageHardAes();
}
break;
default:
UNREACHABLE;
}
if(cache != nullptr) {
// printf("cachedddddddddddd\n"); yes
vm->setCache(cache);
vm->cacheKey = cache->cacheKey;
}
if(dataset != nullptr){
// printf("datasetdddddddddddd\n"); no
vm->setDataset(dataset);
}
vm->allocate(); //allocate the scratchpad
}
catch (std::exception &ex) {
delete vm;
vm = nullptr;
}
return vm;
}
void randomx_vm_set_cache(randomx_vm *machine, randomx_cache* cache) {
assert(machine != nullptr);
assert(cache != nullptr && cache->isInitialized());
if (machine->cacheKey != cache->cacheKey) {
machine->setCache(cache);
machine->cacheKey = cache->cacheKey;
}
}
void randomx_vm_set_dataset(randomx_vm *machine, randomx_dataset *dataset) {
assert(machine != nullptr);
assert(dataset != nullptr);
machine->setDataset(dataset);
}
void randomx_destroy_vm(randomx_vm *machine) {
assert(machine != nullptr);
delete machine;
}
void randomx_calculate_hash(randomx_vm *machine, const void *input, size_t inputSize, void *output) {
assert(machine != nullptr);
assert(inputSize == 0 || input != nullptr);
assert(output != nullptr);
alignas(16) uint64_t tempHash[8];
int blakeResult = blake2b(tempHash, sizeof(tempHash), input, inputSize, nullptr, 0);
assert(blakeResult == 0);
machine->initScratchpad(&tempHash);
machine->resetRoundingMode();
for (int chain = 0; chain < RANDOMX_PROGRAM_COUNT - 1; ++chain) { //RANDOMX_PROGRAM_COUNT =8
machine->run(&tempHash);
blakeResult = blake2b(tempHash, sizeof(tempHash), machine->getRegisterFile(), sizeof(randomx::RegisterFile), nullptr, 0);
assert(blakeResult == 0);
}
machine->run(&tempHash);
machine->getFinalResult(output, RANDOMX_HASH_SIZE);
}
/*
int main(int argc, char** argv) {
const uint8_t myKey[] ={ 0x67,0x0f,0x0b,0x99,0x1d,0xc3,0xfe,0x80,0x56,0x04,0xea,0xc3,0x79,0x35,0x1d,0x9a,0xb5,0x21,0xef,0xac,0x60,0x95,0xf2,0x6b,0xca,0xa3,0xa8,0x56,0x83,0x89,0x77,0x99};
const uint8_t myInput[] = { 0x0e,0x0e,0xe4,0xf3,0xf4,0xff,0x05,0x81,0xd4,0x1a,0x87,0xb3,0xa3,0xd7,0xb6,0x40,0x24,0x8d,0x9f,0x34,0x86,0x07,0xe9,0x67,0x55,0x5c,0xce,0x2f,0x40,0xe6,0x5e,0x5e,0x40,0x45,0x46,0x56,0x9e,0xb3,0xe6,
//0xee,0x85,0x00,0x00,
0xe6,0x23,0x00,0x00,
0xcb,0x84,0x81,0x0b,0xc0,0x28,0xa5,0x76,0xec,0xd2,0x0b,0xf9,0xee,0xee,0x43,0x78,0x9c,0x3d,0x55,0xe5,0x54,0xe3,0x05,0xb3,0x46,0x02,0x09,0x64,0x43,0x21,0xd2,0x9e,0x28};
uint8_t hash[RANDOMX_HASH_SIZE];
int initThreadCount =16;
randomx_cache* cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
randomx_init_cache(cache, myKey, sizeof myKey);
uint32_t datasetItemCount = randomx_dataset_item_count();
printf("datasetItemCount=%d\n", datasetItemCount);
randomx_dataset* dataset = randomx_alloc_dataset(RANDOMX_FLAG_DEFAULT);
std::vector<std::thread> threads;
auto perThread = datasetItemCount / initThreadCount;
auto remainder = datasetItemCount % initThreadCount;
uint32_t startItem = 0;
for (int i = 0; i < initThreadCount; ++i) {
auto count = perThread + (i == initThreadCount - 1 ? remainder : 0);
threads.push_back(std::thread(&randomx_init_dataset, dataset, cache, startItem, count));
startItem += count;
}
for (unsigned i = 0; i < threads.size(); ++i) {
threads[i].join();
}
randomx_release_cache(cache);
randomx_vm* vm = randomx_create_vm(RANDOMX_FLAG_FULL_MEM,nullptr, dataset);
randomx_calculate_hash(vm, &myInput, sizeof myInput, hash);
randomx_destroy_vm(vm);
randomx_release_dataset(dataset);
for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i)
printf("%02x", hash[i] & 0xff);
return 0;
}
//8a48e5f9db45ab79d98574c4d81954fe6ac63842214aff73c244b26330b7c9
*/
/*
int main() {
const uint8_t myKey[] ={ 0x67,0x0f,0x0b,0x99,0x1d,0xc3,0xfe,0x80,0x56,0x04,0xea,0xc3,0x79,0x35,0x1d,0x9a,0xb5,0x21,0xef,0xac,0x60,0x95,0xf2,0x6b,0xca,0xa3,0xa8,0x56,0x83,0x89,0x77,0x99};
const uint8_t myInput[] = { 0x0e,0x0e,0xe4,0xf3,0xf4,0xff,0x05,0x81,0xd4,0x1a,0x87,0xb3,0xa3,0xd7,0xb6,0x40,0x24,0x8d,0x9f,0x34,0x86,0x07,0xe9,0x67,0x55,0x5c,0xce,0x2f,0x40,0xe6,0x5e,0x5e,0x40,0x45,0x46,0x56,0x9e,0xb3,0xe6,
//0xee,0x85,0x00,0x00,
0xe6,0x23,0x00,0x00,
0xcb,0x84,0x81,0x0b,0xc0,0x28,0xa5,0x76,0xec,0xd2,0x0b,0xf9,0xee,0xee,0x43,0x78,0x9c,0x3d,0x55,0xe5,0x54,0xe3,0x05,0xb3,0x46,0x02,0x09,0x64,0x43,0x21,0xd2,0x9e,0x28};
char hash[RANDOMX_HASH_SIZE];
//randomx_flags flags = randomx_get_flags();
randomx_flags flags = RANDOMX_FLAG_DEFAULT;
randomx_cache *myCache = randomx_alloc_cache(flags);
randomx_init_cache(myCache, &myKey, sizeof myKey);
randomx_vm *myMachine = randomx_create_vm(flags, myCache, NULL);
randomx_calculate_hash(myMachine, &myInput, sizeof myInput, hash);
randomx_destroy_vm(myMachine);
randomx_release_cache(myCache);
for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i)
printf("%02x", hash[i] & 0xff);
printf("\n");
return 0;
}
*/
randomx_cache* cache;
randomx_vm* vm = nullptr;
int main(){
const uint8_t myKey[] ={ 0x67,0x0f,0x0b,0x99,0x1d,0xc3,0xfe,0x80,0x56,0x04,0xea,0xc3,0x79,0x35,0x1d,0x9a,0xb5,0x21,0xef,0xac,0x60,0x95,0xf2,0x6b,0xca,0xa3,0xa8,0x56,0x83,0x89,0x77,0x99};
const uint8_t myInput[] = { 0x0e,0x0e,0xe4,0xf3,0xf4,0xff,0x05,0x81,0xd4,0x1a,0x87,0xb3,0xa3,0xd7,0xb6,0x40,0x24,0x8d,0x9f,0x34,0x86,0x07,0xe9,0x67,0x55,0x5c,0xce,0x2f,0x40,0xe6,0x5e,0x5e,0x40,0x45,0x46,0x56,0x9e,0xb3,0xe6,
0xe6,0x23,0x00,0x00,
0xcb,0x84,0x81,0x0b,0xc0,0x28,0xa5,0x76,0xec,0xd2,0x0b,0xf9,0xee,0xee,0x43,0x78,0x9c,0x3d,0x55,0xe5,0x54,0xe3,0x05,0xb3,0x46,0x02,0x09,0x64,0x43,0x21,0xd2,0x9e,0x28};
// const uint8_t myKey[] ={146, 6, 71, 248, 241, 11, 139, 72, 70, 73, 173, 248, 53, 153, 197, 184, 107, 186, 19, 126, 126, 178, 46, 149, 221, 135, 57, 217, 133, 40, 246, 119};
// const uint8_t myInput[] = {0, 0, 0, 14, 246, 237, 44, 156, 4, 131, 10, 137, 157, 56, 143, 188, 94, 194, 80, 172, 219, 123, 75, 112, 250, 36, 34, 195, 214, 232, 2, 195, 72, 210, 201, 0, 0, 0, 0, 0, 128, 7, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
uint8_t hash[RANDOMX_HASH_SIZE];
cache = randomx_alloc_cache(RANDOMX_FLAG_DEFAULT);
randomx_init_cache(cache, myKey, sizeof(myKey));
vm = randomx_create_vm(RANDOMX_FLAG_DEFAULT, cache, nullptr);
// randomx_vm_set_cache(vm, cache);
randomx_calculate_hash(vm, myInput, sizeof(myInput), hash);
for (unsigned i = 0; i < RANDOMX_HASH_SIZE; ++i)
// printf("%02d ", hash[i] & 0xff);
printf("%02x", hash[i] & 0xff);
//assert(equalsHex(hash, "1a7151b1367507ded1e9af0b97da8ae23ec84e9f352eb731eab8f0f060710300"));
}
//g++ aes_hash.cpp allocator.cpp argon2_avx2.c argon2_core.c argon2_ref.c argon2_ssse3.c assembly_generator_x86.cpp blake2_generator.cpp bytecode_machine.cpp cpu.cpp dataset.cpp instruction.cpp instructions_portable.cpp randomx.cpp reciprocal.c soft_aes.cpp superscalar.cpp virtual_machine.cpp virtual_memory.cpp vm_compiled.cpp vm_compiled_light.cpp vm_interpreted.cpp vm_interpreted_light.cpp ./blake2/blake2b.c jit_compiler_x86.cpp jit_compiler_x86_static.S

Some files were not shown because too many files have changed in this diff Show More