162 lines
6.5 KiB
C++
162 lines
6.5 KiB
C++
|
/*
|
|||
|
Copyright (c) 2018-2019, tevador <tevador@gmail.com>
|
|||
|
|
|||
|
All rights reserved.
|
|||
|
|
|||
|
Redistribution and use in source and binary forms, with or without
|
|||
|
modification, are permitted provided that the following conditions are met:
|
|||
|
* Redistributions of source code must retain the above copyright
|
|||
|
notice, this list of conditions and the following disclaimer.
|
|||
|
* Redistributions in binary form must reproduce the above copyright
|
|||
|
notice, this list of conditions and the following disclaimer in the
|
|||
|
documentation and/or other materials provided with the distribution.
|
|||
|
* Neither the name of the copyright holder nor the
|
|||
|
names of its contributors may be used to endorse or promote products
|
|||
|
derived from this software without specific prior written permission.
|
|||
|
|
|||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|||
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|||
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
*/
|
|||
|
|
|||
|
#include <iostream>
|
|||
|
#include <iomanip>
|
|||
|
#include <stdexcept>
|
|||
|
#include <sstream>
|
|||
|
#include <cmath>
|
|||
|
#include <cfloat>
|
|||
|
#include "vm_interpreted.hpp"
|
|||
|
#include "dataset.hpp"
|
|||
|
#include "intrin_portable.h"
|
|||
|
#include "reciprocal.h"
|
|||
|
|
|||
|
namespace randomx {
|
|||
|
|
|||
|
template<class Allocator, bool softAes>
|
|||
|
void InterpretedVm<Allocator, softAes>::setDataset(randomx_dataset* dataset) {
|
|||
|
printf("Interpreted setDataset\n");
|
|||
|
datasetPtr = dataset;
|
|||
|
mem.memory = dataset->memory;
|
|||
|
}
|
|||
|
|
|||
|
template<class Allocator, bool softAes>
|
|||
|
void InterpretedVm<Allocator, softAes>::run(void* seed) {
|
|||
|
printf("Interpreted run\n");
|
|||
|
VmBase<Allocator, softAes>::generateProgram(seed);
|
|||
|
randomx_vm::initialize();
|
|||
|
execute();
|
|||
|
}
|
|||
|
|
|||
|
template<class Allocator, bool softAes>
|
|||
|
void InterpretedVm<Allocator, softAes>::execute() {
|
|||
|
|
|||
|
NativeRegisterFile nreg;
|
|||
|
printf("InterpretedVm execute\n");
|
|||
|
for(unsigned i = 0; i < RegisterCountFlt; ++i) //RegisterCountFlt = 4
|
|||
|
nreg.a[i] = rx_load_vec_f128(®.a[i].lo);
|
|||
|
|
|||
|
//printf("nreg.r[0]=%016llx\n",nreg.r[0]);
|
|||
|
//printf("nreg.r[1]=%016llx\n",nreg.r[1]);
|
|||
|
//printf("nreg.r[2]=%016llx\n",nreg.r[2]);
|
|||
|
//printf("nreg.r[3]=%016llx\n",nreg.r[3]);
|
|||
|
//printf("nreg.r[4]=%016llx\n",nreg.r[4]);
|
|||
|
//printf("nreg.r[5]=%016llx\n",nreg.r[5]);
|
|||
|
//printf("nreg.r[6]=%016llx\n",nreg.r[6]);
|
|||
|
//printf("nreg.r[7]=%016llx\n",nreg.r[7]);
|
|||
|
|
|||
|
//nreg.r 初始8个寄存器都是0;
|
|||
|
|
|||
|
compileProgram(program, bytecode, nreg);
|
|||
|
|
|||
|
uint32_t spAddr0 = mem.mx;
|
|||
|
uint32_t spAddr1 = mem.ma;
|
|||
|
|
|||
|
for(unsigned ic = 0; ic < RANDOMX_PROGRAM_ITERATIONS; ++ic) { //2048 RANDOMX_PROGRAM_ITERATIONS
|
|||
|
uint64_t spMix = nreg.r[config.readReg0] ^ nreg.r[config.readReg1];
|
|||
|
//printf("ic= %0d,spAddr0= %0d,spAddr1=%0d,spMix=%0d,config.readReg0= %0d,config.readReg1=%0d\n",ic,spAddr0,spAddr1,spMix,config.readReg0,config.readReg1);
|
|||
|
spAddr0 ^= spMix;
|
|||
|
spAddr0 &= ScratchpadL3Mask64;
|
|||
|
spAddr1 ^= spMix >> 32;
|
|||
|
spAddr1 &= ScratchpadL3Mask64; //1cycle
|
|||
|
|
|||
|
//printf("ic= %0d,spAddr0= %08lx,spAddr1=%08lx",ic,spAddr0,spAddr1);
|
|||
|
for (unsigned i = 0; i < RegistersCount; ++i) //并行执行,读需要一个cycle
|
|||
|
nreg.r[i] ^= load64(scratchpad + spAddr0 + 8 * i);
|
|||
|
|
|||
|
for (unsigned i = 0; i < RegisterCountFlt; ++i) //并行执行,读需要一个cycle,这里ram可以设置为读写总线为128bit
|
|||
|
nreg.f[i] = rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * i);
|
|||
|
|
|||
|
for (unsigned i = 0; i < RegisterCountFlt; ++i) //并行执行,需要两个cycle
|
|||
|
nreg.e[i] = maskRegisterExponentMantissa(config, rx_cvt_packed_int_vec_f128(scratchpad + spAddr1 + 8 * (RegisterCountFlt + i)));
|
|||
|
|
|||
|
executeBytecode(bytecode, scratchpad, config);
|
|||
|
|
|||
|
mem.mx ^= nreg.r[config.readReg2] ^ nreg.r[config.readReg3];
|
|||
|
mem.mx &= CacheLineAlignMask;
|
|||
|
|
|||
|
// printf("ic= %0d,datasetOffset= %08lx,mem.ma=%08lx\n",ic,datasetOffset,mem.ma);
|
|||
|
//datasetPrefetch(datasetOffset + mem.mx);
|
|||
|
datasetRead(datasetOffset + mem.ma, nreg.r); //从memory中提取数据用于填充nreg.r 8个通用寄存器; 150cycle,连续读数据量为64byte;
|
|||
|
std::swap(mem.mx, mem.ma);
|
|||
|
|
|||
|
//if(ic== (RANDOMX_PROGRAM_ITERATIONS-1)){for (int i = 0; i < RegistersCount; ++i) printf("nreg.r[%d]= %016llx\n",i,nreg.r[i]);}
|
|||
|
|
|||
|
for (unsigned i = 0; i < RegistersCount; ++i)
|
|||
|
store64(scratchpad + spAddr1 + 8 * i, nreg.r[i]);
|
|||
|
|
|||
|
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
|||
|
nreg.f[i] = rx_xor_vec_f128(nreg.f[i], nreg.e[i]);
|
|||
|
|
|||
|
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
|||
|
rx_store_vec_f128((double*)(scratchpad + spAddr0 + 16 * i), nreg.f[i]); //ram写不需要cycle,但考虑到通道有限,这里上面两步写算一个cycle
|
|||
|
|
|||
|
spAddr0 = 0;
|
|||
|
spAddr1 = 0;
|
|||
|
}
|
|||
|
|
|||
|
for (unsigned i = 0; i < RegistersCount; ++i)
|
|||
|
store64(®.r[i], nreg.r[i]);
|
|||
|
|
|||
|
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
|||
|
rx_store_vec_f128(®.f[i].lo, nreg.f[i]);
|
|||
|
|
|||
|
for (unsigned i = 0; i < RegisterCountFlt; ++i)
|
|||
|
rx_store_vec_f128(®.e[i].lo, nreg.e[i]);
|
|||
|
}
|
|||
|
|
|||
|
template<class Allocator, bool softAes>
|
|||
|
void InterpretedVm<Allocator, softAes>::datasetRead(uint64_t address, int_reg_t(&r)[RegistersCount]) {
|
|||
|
//printf("vm_interpreted datasetRead\n");
|
|||
|
uint64_t* datasetLine = (uint64_t*)(mem.memory + address);
|
|||
|
for (int i = 0; i < RegistersCount; ++i)
|
|||
|
r[i] ^= datasetLine[i];
|
|||
|
|
|||
|
//printf("address= %016llx\n",address);
|
|||
|
//for (int i = 0; i < RegistersCount; ++i) printf("datasetLine[%d]= %016llx\n",i,datasetLine[i]);
|
|||
|
//for (int i = 0; i < RegistersCount; ++i) printf("r[%d]= %016llx\n",i,r[i]);
|
|||
|
|
|||
|
//uint64_t* datasetLine = (uint64_t*)(mem.memory + 1);
|
|||
|
//for (int i = 0; i < RegistersCount; ++i){
|
|||
|
// printf("datasetLine[%d]=%0llx\n",i,datasetLine[i]);
|
|||
|
// r[i] ^= datasetLine[i];
|
|||
|
//}
|
|||
|
}
|
|||
|
|
|||
|
template<class Allocator, bool softAes>
|
|||
|
void InterpretedVm<Allocator, softAes>::datasetPrefetch(uint64_t address) {
|
|||
|
//printf("vm_interpreted datasetPrefetch\n");
|
|||
|
rx_prefetch_nta(mem.memory + address);
|
|||
|
}
|
|||
|
|
|||
|
template class InterpretedVm<AlignedAllocator<CacheLineSize>, false>;
|
|||
|
template class InterpretedVm<AlignedAllocator<CacheLineSize>, true>;
|
|||
|
template class InterpretedVm<LargePageAllocator, false>;
|
|||
|
template class InterpretedVm<LargePageAllocator, true>;
|
|||
|
}
|