Skip to content

Commit

Permalink
fixed fp_noncomp bug, ci toolchain script update, increased DRAM late…
Browse files Browse the repository at this point in the history
…ncy to 100 cycles
  • Loading branch information
Blaise Tine committed Nov 23, 2020
1 parent e281d32 commit 2d4fef6
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 28 deletions.
10 changes: 6 additions & 4 deletions ci/toolchain_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ set -e

REPOSITORY=https://github.com/vortexgpgpu/vortex-toolchain-prebuilt/raw/master

DESTDIR="${DESTDIR:=/opt}"

riscv()
{
for x in {a..o}
Expand All @@ -14,7 +16,7 @@ riscv()
cat riscv-gnu-toolchain.tar.bz2.parta* > riscv-gnu-toolchain.tar.bz2
tar -xvf riscv-gnu-toolchain.tar.bz2
rm -f riscv-gnu-toolchain.tar.bz2*
sudo cp -r riscv-gnu-toolchain /opt/
cp -r riscv-gnu-toolchain $DESTDIR
rm -rf riscv-gnu-toolchain
}

Expand All @@ -27,7 +29,7 @@ llvm()
cat llvm-riscv.tar.bz2.parta* > llvm-riscv.tar.bz2
tar -xvf llvm-riscv.tar.bz2
rm -f llvm-riscv.tar.bz2*
sudo cp -r llvm-riscv /opt/
cp -r llvm-riscv $DESTDIR
rm -rf llvm-riscv
}

Expand All @@ -36,7 +38,7 @@ pocl()
wget $REPOSITORY/pocl/ubuntu/bionic/pocl.tar.bz2
tar -xvf pocl.tar.bz2
rm -f pocl.tar.bz2
sudo cp -r pocl /opt/
cp -r pocl $DESTDIR
rm -rf pocl
}

Expand All @@ -45,7 +47,7 @@ verilator()
wget $REPOSITORY/verilator/ubuntu/bionic/verilator.tar.bz2
tar -xvf verilator.tar.bz2
rm -f verilator.tar.bz2
sudo cp -r verilator /opt/
cp -r verilator $DESTDIR
rm -rf verilator
}

Expand Down
35 changes: 22 additions & 13 deletions driver/opae/vlsim/opae_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#define CCI_WQ_SIZE 16

#define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 4
#define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16

Expand Down Expand Up @@ -261,14 +261,14 @@ void opae_sim::avs_bus() {
if (dram_rd_it != dram_reads_.end()) {
vortex_afu_->avs_readdatavalid = 1;
memcpy(vortex_afu_->avs_readdata, dram_rd_it->block.data(), CACHE_BLOCK_SIZE);
uint32_t tag = dram_rd_it->tag;
uint32_t addr = dram_rd_it->addr;
dram_reads_.erase(dram_rd_it);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, tag);
/*printf("%0ld: [sim] DRAM Rd Rsp: addr=%x, pending={", timestamp, addr * CACHE_BLOCK_SIZE);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.tag);
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
else
printf(" %0x", req.tag);
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
}
printf("}\n");*/
}
Expand Down Expand Up @@ -300,18 +300,27 @@ void opae_sim::avs_bus() {
}
if (vortex_afu_->avs_read) {
assert(0 == vortex_afu_->mem_bank_select);
dram_rd_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
unsigned base_addr = (vortex_afu_->avs_address * CACHE_BLOCK_SIZE);
ram_.read(base_addr, CACHE_BLOCK_SIZE, dram_req.block.data());
dram_req.tag = base_addr;
dram_rd_req_t dram_req;

dram_req.addr = vortex_afu_->avs_address;

ram_.read(vortex_afu_->avs_address * CACHE_BLOCK_SIZE, CACHE_BLOCK_SIZE, dram_req.block.data());

dram_req.cycles_left = DRAM_LATENCY;
for (auto& req : dram_reads_) {
if (req.addr == dram_req.addr) {
dram_req.cycles_left = req.cycles_left;
break;
}
}

dram_reads_.emplace_back(dram_req);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, base_addr);
/*printf("%0ld: [sim] DRAM Rd Req: addr=%x, pending={", timestamp, dram_req.addr * CACHE_BLOCK_SIZE);
for (auto& req : dram_reads_) {
if (req.cycles_left != 0)
printf(" !%0x", req.tag);
printf(" !%0x", req.addr * CACHE_BLOCK_SIZE);
else
printf(" %0x", req.tag);
printf(" %0x", req.addr * CACHE_BLOCK_SIZE);
}
printf("}\n");*/
}
Expand Down
2 changes: 1 addition & 1 deletion driver/opae/vlsim/opae_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class opae_sim {
typedef struct {
int cycles_left;
std::array<uint8_t, CACHE_BLOCK_SIZE> block;
uint32_t tag;
uint32_t addr;
} dram_rd_req_t;

typedef struct {
Expand Down
12 changes: 6 additions & 6 deletions hw/rtl/fp_cores/VX_fp_noncomp.v
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ module VX_fp_noncomp #(
case (frm_r) // use LSB to distinguish MIN and MAX
3: fminmax_res[i] = a_smaller[i] ? dataa_r[i] : datab_r[i];
4: fminmax_res[i] = a_smaller[i] ? datab_r[i] : dataa_r[i];
default: fminmax_res[i] = 32'hdeadbeaf; // don't care value
default: fminmax_res[i] = 'x; // don't care value
endcase
end
end
Expand All @@ -160,7 +160,7 @@ module VX_fp_noncomp #(
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
default: fsgnj_res[i] = 'x; // don't care value
endcase
end
end
Expand Down Expand Up @@ -192,16 +192,16 @@ module VX_fp_noncomp #(
`FRM_RDN: begin
if (a_type[i].is_nan || b_type[i].is_nan) begin
fcmp_res[i] = 32'h0; // result is 0 when either operand is NaN
// ** FEQS only raise NV flag when either operand is signaling NaN
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
// FEQS only raise NV flag when either operand is signaling NaN
fcmp_excp[i] = {(a_type[i].is_signaling | b_type[i].is_signaling), 4'h0};
end
else begin
fcmp_res[i] = {31'h0, ab_equal[i]};
fcmp_excp[i] = 5'h0;
end
end
default: begin
fcmp_res[i] = 32'hdeadbeaf; // don't care value
fcmp_res[i] = 'x; // don't care value
fcmp_excp[i] = 5'h0;
end
endcase
Expand All @@ -226,7 +226,7 @@ module VX_fp_noncomp #(
end
//`FPU_MISC:
default: begin
case (frm)
case (frm_r)
0,1,2: begin
tmp_result[i] = fsgnj_res[i];
{tmp_fflags[i].NV, tmp_fflags[i].DZ, tmp_fflags[i].OF, tmp_fflags[i].UF, tmp_fflags[i].NX} = 5'h0;
Expand Down
16 changes: 13 additions & 3 deletions hw/simulate/simulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <iomanip>

#define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 4
#define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16

Expand Down Expand Up @@ -180,9 +180,19 @@ void Simulator::eval_dram_bus() {
}
} else {
dram_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
dram_req.tag = vortex_->dram_req_tag;

dram_req.tag = vortex_->dram_req_tag;
dram_req.addr = vortex_->dram_req_addr;

ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());

dram_req.cycles_left = DRAM_LATENCY;
for (auto& req : dram_rsp_vec_) {
if (req.addr == dram_req.addr) {
dram_req.cycles_left = req.cycles_left;
break;
}
}
dram_rsp_vec_.emplace_back(dram_req);
}
}
Expand Down
3 changes: 2 additions & 1 deletion hw/simulate/simulator.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ class Simulator {
typedef struct {
int cycles_left;
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
unsigned tag;
uint32_t tag;
uint32_t addr;
} dram_req_t;

std::unordered_map<int, std::stringstream> print_bufs_;
Expand Down

0 comments on commit 2d4fef6

Please sign in to comment.