diff --git a/ci/toolchain_install.sh b/ci/toolchain_install.sh index 36e8ca084..8de6fff9b 100755 --- a/ci/toolchain_install.sh +++ b/ci/toolchain_install.sh @@ -5,6 +5,8 @@ set -e REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master +DESTDIR="${DESTDIR:=/opt}" + riscv() { for x in {a..o} @@ -14,7 +16,7 @@ riscv() cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2 tar -xvf riscv-gnu-toolchain.tar.bz2 rm -f riscv-gnu-toolchain.tar.bz2* - sudo cp -r riscv-gnu-toolchain /opt/ + cp -r riscv-gnu-toolchain $DESTDIR rm -rf riscv-gnu-toolchain } @@ -27,7 +29,7 @@ llvm() cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2 tar -xvf llvm-riscv.tar.bz2 rm -f llvm-riscv.tar.bz2* - sudo cp -r llvm-riscv /opt/ + cp -r llvm-riscv $DESTDIR rm -rf llvm-riscv } @@ -36,7 +38,7 @@ pocl() wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2 tar -xvf pocl.tar.bz2 rm -f pocl.tar.bz2 - sudo cp -r pocl /opt/ + cp -r pocl $DESTDIR rm -rf pocl } @@ -45,7 +47,7 @@ verilator() wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2 tar -xvf verilator.tar.bz2 rm -f verilator.tar.bz2 - sudo cp -r verilator /opt/ + cp -r verilator $DESTDIR rm -rf verilator } diff --git a/driver/opae/vlsim/opae_sim.cpp b/driver/opae/vlsim/opae_sim.cpp index d591ea01d..0b9513576 100644 --- a/driver/opae/vlsim/opae_sim.cpp +++ b/driver/opae/vlsim/opae_sim.cpp @@ -9,7 +9,7 @@ #define CCI_WQ_SIZE 16 #define ENABLE_DRAM_STALLS -#define DRAM_LATENCY 4 +#define DRAM_LATENCY 100 #define DRAM_RQ_SIZE 16 #define DRAM_STALLS_MODULO 16 @@ -261,14 +261,14 @@ void opae_sim::avs_bus() { if (dram_rd_it != dram_reads_.end()) { vortex_afu_->avs_readdatavalid = 1; memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE); - uint32_t tag = dram_rd_it->tag; + uint32_t addr = dram_rd_it->addr; dram_reads_.erase(dram_rd_it); - /*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag); + /*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE); for (auto& req : dram_reads_) { if (req.cycles_left != 0) - printf(" !%0x", req.tag); + printf(" !%0x", req.addr * CACHE_BLOCK_SIZE); else - printf(" %0x", req.tag); + printf(" %0x", req.addr * CACHE_BLOCK_SIZE); } printf("}\n");*/ } @@ -300,18 +300,27 @@ void opae_sim::avs_bus() { } if (vortex_afu_->avs_read) { assert(0 == vortex_afu_->mem_bank_select); - dram_rd_req_t dram_req; - dram_req.cycles_left = DRAM_LATENCY; - unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE); - ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data()); - dram_req.tag = base_addr; + dram_rd_req_t dram_req; + + dram_req.addr = vortex_afu_->avs_address; + + ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.block.data()); + + dram_req.cycles_left = DRAM_LATENCY; + for (auto& req : dram_reads_) { + if (req.addr == dram_req.addr) { + dram_req.cycles_left = req.cycles_left; + break; + } + } + dram_reads_.emplace_back(dram_req); - /*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr); + /*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * CACHE_BLOCK_SIZE); for (auto& req : dram_reads_) { if (req.cycles_left != 0) - printf(" !%0x", req.tag); + printf(" !%0x", req.addr * CACHE_BLOCK_SIZE); else - printf(" %0x", req.tag); + printf(" %0x", req.addr * CACHE_BLOCK_SIZE); } printf("}\n");*/ } diff --git a/driver/opae/vlsim/opae_sim.h b/driver/opae/vlsim/opae_sim.h index 54421d261..cfc750dc9 100644 --- a/driver/opae/vlsim/opae_sim.h +++ b/driver/opae/vlsim/opae_sim.h @@ -41,7 +41,7 @@ class opae_sim { typedef struct { int cycles_left; std::array block; - uint32_t tag; + uint32_t addr; } dram_rd_req_t; typedef struct { diff --git a/hw/rtl/fp_cores/VX_fp_noncomp.v b/hw/rtl/fp_cores/VX_fp_noncomp.v index 107cac5f1..4959ace4f 100644 --- a/hw/rtl/fp_cores/VX_fp_noncomp.v +++ b/hw/rtl/fp_cores/VX_fp_noncomp.v @@ -147,7 +147,7 @@ module VX_fp_noncomp #( case (frm_r) // use LSB to distinguish MIN and MAX 3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i]; 4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i]; - default: fminmax_res[i] = 32'hdeadbeaf; // don't care value + default: fminmax_res[i] = 'x; // don't care value endcase end end @@ -160,7 +160,7 @@ module VX_fp_noncomp #( 0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]}; 1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]}; 2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]}; - default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value + default: fsgnj_res[i] = 'x; // don't care value endcase end end @@ -192,8 +192,8 @@ module VX_fp_noncomp #( `FRM_RDN: begin if (a_type[i].is_nan || b_type[i].is_nan) begin fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN - // ** FEQS only raise NV flag when either operand is signaling NaN - fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0}; + // FEQS only raise NV flag when either operand is signaling NaN + fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0}; end else begin fcmp_res[i] = {31'h0, ab_equal[i]}; @@ -201,7 +201,7 @@ module VX_fp_noncomp #( end end default: begin - fcmp_res[i] = 32'hdeadbeaf; // don't care value + fcmp_res[i] = 'x; // don't care value fcmp_excp[i] = 5'h0; end endcase @@ -226,7 +226,7 @@ module VX_fp_noncomp #( end //`FPU_MISC: default: begin - case (frm) + case (frm_r) 0,1,2: begin tmp_result[i] = fsgnj_res[i]; {tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0; diff --git a/hw/simulate/simulator.cpp b/hw/simulate/simulator.cpp index 206e0eff3..09a8e4c5c 100644 --- a/hw/simulate/simulator.cpp +++ b/hw/simulate/simulator.cpp @@ -4,7 +4,7 @@ #include #define ENABLE_DRAM_STALLS -#define DRAM_LATENCY 4 +#define DRAM_LATENCY 100 #define DRAM_RQ_SIZE 16 #define DRAM_STALLS_MODULO 16 @@ -180,9 +180,19 @@ void Simulator::eval_dram_bus() { } } else { dram_req_t dram_req; - dram_req.cycles_left = DRAM_LATENCY; - dram_req.tag = vortex_->dram_req_tag; + + dram_req.tag = vortex_->dram_req_tag; + dram_req.addr = vortex_->dram_req_addr; + ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data()); + + dram_req.cycles_left = DRAM_LATENCY; + for (auto& req : dram_rsp_vec_) { + if (req.addr == dram_req.addr) { + dram_req.cycles_left = req.cycles_left; + break; + } + } dram_rsp_vec_.emplace_back(dram_req); } } diff --git a/hw/simulate/simulator.h b/hw/simulate/simulator.h index d688ef885..0b683aeaf 100644 --- a/hw/simulate/simulator.h +++ b/hw/simulate/simulator.h @@ -51,7 +51,8 @@ class Simulator { typedef struct { int cycles_left; std::array block; - unsigned tag; + uint32_t tag; + uint32_t addr; } dram_req_t; std::unordered_map print_bufs_;