diff --git a/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/Makefile b/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/Makefile new file mode 100644 index 000000000..f5c87bb1a --- /dev/null +++ b/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/Makefile @@ -0,0 +1,34 @@ +######################################################### +# Network Configutaion +# If not configured, Will use default Values + bsg_global_X ?= $(bsg_tiles_X) + bsg_global_Y ?= $(bsg_tiles_Y)+1 + +######################################################### +#Tile group configuration +# If not configured, Will use default Values + bsg_tiles_org_X ?= 0 + bsg_tiles_org_Y ?= 1 + +# If not configured, Will use default Values + bsg_tiles_X ?= 4 + bsg_tiles_Y ?= 4 + + +all: main.run + + +KERNEL_NAME ?=kernel_hardware_shared_mem_load_store + +OBJECT_FILES=main.o kernel_hardware_shared_mem_load_store.o + +include ../../Makefile.include + + +main.riscv: $(LINK_SCRIPT) $(OBJECT_FILES) $(SPMD_COMMON_OBJECTS) $(BSG_MANYCORE_LIB) ../../common/crt.o + $(RISCV_LINK) $(OBJECT_FILES) $(SPMD_COMMON_OBJECTS) -L. "-l:$(BSG_MANYCORE_LIB)" -o $@ $(RISCV_LINK_OPTS) + + +main.o: Makefile + +include ../../../mk/Makefile.tail_rules diff --git a/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/kernel_hardware_shared_mem_load_store.cpp b/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/kernel_hardware_shared_mem_load_store.cpp new file mode 100644 index 000000000..bbb6814bd --- /dev/null +++ b/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/kernel_hardware_shared_mem_load_store.cpp @@ -0,0 +1,76 @@ +// * This kernel loads a memory block from DRAM into hardware +// tile group shared memory, and stores it back to another +// array to DRAM to be compared. +// * Tile group dimensions are fixed at 4x4. + +// TEMPLATE_TG_DIM_X/Y must be defined before bsg_manycore.h is +// included. bsg_tiles_X and bsg_tiles_Y must also be defined for +// legacy reasons, but they are deprecated. + + +#define TEMPLATE_TG_DIM_X 4 +#define TEMPLATE_TG_DIM_Y 4 +#define TEMPLATE_BLOCK_SIZE 1024 +#define TEMPLATE_STRIPE_SIZE 1 +#define bsg_tiles_X TEMPLATE_TG_DIM_X +#define bsg_tiles_Y TEMPLATE_TG_DIM_Y + +#include +#include "kernel_hardware_shared_mem_load_store.hpp" +#include +#include "bsg_shared_mem.hpp" + +using namespace bsg_manycore; + + +bsg_barrier barrier; + + +template + int __attribute__ ((noinline)) + hardware_shared_mem_load_store(T *A, T *B) { + + // Declare tile-group shared memory + TileGroupSharedMem A_sh; + + for (int iter_x = __bsg_id; iter_x < BLOCK_SIZE; iter_x += TG_DIM_X * TG_DIM_Y) { + A_sh[iter_x] = A[iter_x]; + } + + barrier.sync(); + + for (int iter_x = __bsg_id; iter_x < BLOCK_SIZE; iter_x += TG_DIM_X * TG_DIM_Y) { + B[iter_x] = A_sh[iter_x]; + } + + barrier.sync(); + + return 0; + } + + +extern "C" { + int __attribute__ ((noinline)) kernel_hardware_shared_mem_load_store(float *A, + float *sum, + uint32_t WIDTH, + uint32_t block_size) { + int rc; + bsg_cuda_print_stat_kernel_start(); + + rc = hardware_shared_mem_load_store (A, + sum); + + barrier.sync(); + + bsg_cuda_print_stat_kernel_end(); + + return rc; + } +} diff --git a/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/kernel_hardware_shared_mem_load_store.hpp b/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/kernel_hardware_shared_mem_load_store.hpp new file mode 100644 index 000000000..e58692d7c --- /dev/null +++ b/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/kernel_hardware_shared_mem_load_store.hpp @@ -0,0 +1,5 @@ +#ifndef __KERNEL_HARDWARE_SHARED_MEM_LOAD_STORE_HPP +#define __KERNEL_HARDWARE_SHARED_MEM_LOAD_STORE_HPP +#include + +#endif //__KERNEL_HARDWARE_SHARED_MEM_LOAD_STROE_HPP diff --git a/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/main.c b/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/main.c new file mode 120000 index 000000000..24daac669 --- /dev/null +++ b/software/spmd/bsg_cuda_lite_runtime/hardware_shared_mem_load_store/main.c @@ -0,0 +1 @@ +../main/main.c \ No newline at end of file