From f5d6717ae415bdbfde424b01bc99f646656d4089 Mon Sep 17 00:00:00 2001 From: Dan Petrisko Date: Sun, 13 Aug 2023 22:15:03 -0700 Subject: [PATCH] Adding DMA --- libraries/bsg_manycore_cuda.cpp | 36 +++-- libraries/bsg_manycore_memsys.c | 1 + libraries/bsg_manycore_memsys.h | 1 + .../dma/blackparrot/bsg_manycore_dma.cpp | 128 ++++++++++++++++++ libraries/features/dma/blackparrot/feature.mk | 44 ++++++ .../platforms/bigblade-vcs/compilation.mk | 1 - .../hammerblade-vcs/bsg_manycore_platform.cpp | 35 +---- .../hammerblade-vcs/bsg_manycore_platform.hpp | 39 ++++++ .../platforms/hammerblade-vcs/library.mk | 7 +- libraries/platforms/hammerblade-vcs/link.mk | 2 +- machine.mk | 2 +- platform.mk | 2 +- 12 files changed, 240 insertions(+), 58 deletions(-) create mode 100644 libraries/features/dma/blackparrot/bsg_manycore_dma.cpp create mode 100644 libraries/features/dma/blackparrot/feature.mk create mode 100644 libraries/platforms/hammerblade-vcs/bsg_manycore_platform.hpp diff --git a/libraries/bsg_manycore_cuda.cpp b/libraries/bsg_manycore_cuda.cpp index 69ff7f4eb..6df811f60 100644 --- a/libraries/bsg_manycore_cuda.cpp +++ b/libraries/bsg_manycore_cuda.cpp @@ -2105,12 +2105,14 @@ int hb_mc_device_pod_dma_to_device(hb_mc_device_t *device, hb_mc_pod_id_t pod_id hb_mc_pod_t *pod = &device->pods[pod_id]; // flush cache - err = hb_mc_manycore_pod_flush_vcache(device->mc, pod->pod_coord); - if (err != HB_MC_SUCCESS) { - bsg_pr_err("%s: failed to flush victim cache: %s\n", - __func__, - hb_mc_strerror(err)); - return err; + if (!device->mc->config.memsys.dma2cache) { + err = hb_mc_manycore_pod_flush_vcache(device->mc, pod->pod_coord); + if (err != HB_MC_SUCCESS) { + bsg_pr_err("%s: failed to flush victim cache: %s\n", + __func__, + hb_mc_strerror(err)); + return err; + } } // for each job... @@ -2136,9 +2138,11 @@ int hb_mc_device_pod_dma_to_device(hb_mc_device_t *device, hb_mc_pod_id_t pod_id } // invalidate cache - err = hb_mc_manycore_pod_invalidate_vcache(device->mc, pod->pod_coord); - if (err != HB_MC_SUCCESS) { - return err; + if (!device->mc->config.memsys.dma2cache) { + err = hb_mc_manycore_pod_invalidate_vcache(device->mc, pod->pod_coord); + if (err != HB_MC_SUCCESS) { + return err; + } } return HB_MC_SUCCESS; @@ -2155,12 +2159,14 @@ int hb_mc_device_pod_dma_to_host(hb_mc_device_t *device, hb_mc_pod_id_t pod_id, // flush cache hb_mc_pod_t *pod = &device->pods[pod_id]; - err = hb_mc_manycore_pod_flush_vcache(device->mc, pod->pod_coord); - if (err != HB_MC_SUCCESS) { - bsg_pr_err("%s: failed to flush victim cache: %s\n", - __func__, - hb_mc_strerror(err)); - return err; + if (!device->mc->config.memsys.dma2cache) { + err = hb_mc_manycore_pod_flush_vcache(device->mc, pod->pod_coord); + if (err != HB_MC_SUCCESS) { + bsg_pr_err("%s: failed to flush victim cache: %s\n", + __func__, + hb_mc_strerror(err)); + return err; + } } // for each job... diff --git a/libraries/bsg_manycore_memsys.c b/libraries/bsg_manycore_memsys.c index 6d574918a..45eabfad7 100644 --- a/libraries/bsg_manycore_memsys.c +++ b/libraries/bsg_manycore_memsys.c @@ -77,6 +77,7 @@ int hb_mc_memsys_set_features(hb_mc_memsys_t *memsys) memsys->feature_cache = 1; memsys->feature_dma = 0; } + memsys->dma2cache = 0; return HB_MC_SUCCESS; } diff --git a/libraries/bsg_manycore_memsys.h b/libraries/bsg_manycore_memsys.h index ff7038c73..4833bf7f7 100644 --- a/libraries/bsg_manycore_memsys.h +++ b/libraries/bsg_manycore_memsys.h @@ -63,6 +63,7 @@ typedef struct __hb_mc_memsys_t { // memory system features uint32_t feature_dma; //!< Can I do DMA? uint32_t feature_cache; //!< Do I have DMA? + uint32_t dma2cache; //!< Does my DMA write directly to vcache // dram address bitfields hb_mc_dram_pa_bitfield dram_ro; //!< DRAM row bits info hb_mc_dram_pa_bitfield dram_bg; //!< DRAM bankgroup bits info diff --git a/libraries/features/dma/blackparrot/bsg_manycore_dma.cpp b/libraries/features/dma/blackparrot/bsg_manycore_dma.cpp new file mode 100644 index 000000000..566a007eb --- /dev/null +++ b/libraries/features/dma/blackparrot/bsg_manycore_dma.cpp @@ -0,0 +1,128 @@ +#include +#include +#include +#include +#include + +/* these are convenience macros that are only good for one line prints */ +#define dma_pr_dbg(mc, fmt, ...) \ + bsg_pr_dbg("%s: " fmt, mc->name, ##__VA_ARGS__) + +#define dma_pr_err(mc, fmt, ...) \ + bsg_pr_err("%s: " fmt, mc->name, ##__VA_ARGS__) + +#define dma_pr_warn(mc, fmt, ...) \ + bsg_pr_warn("%s: " fmt, mc->name, ##__VA_ARGS__) + +#define dma_pr_info(mc, fmt, ...) \ + bsg_pr_info("%s: " fmt, mc->name, ##__VA_ARGS__) + +int hb_mc_npa_to_bp_eva(hb_mc_manycore_t *mc, + const hb_mc_npa_t *npa, + uint64_t *bp_eva) { + const hb_mc_config_t *cfg = hb_mc_manycore_get_config(mc); + hb_mc_coordinate_t coord = hb_mc_npa_get_xy(npa); + hb_mc_coordinate_t npod = hb_mc_config_npod(cfg, coord); + unsigned long long npody2 = npod.y / 2; // We pack all vcache pods into 2 + hb_mc_epa_t epa = hb_mc_npa_get_epa(npa); + + uint64_t base_eva; + if (hb_mc_config_is_vanilla_core(cfg, coord)) { + *bp_eva = 0 + | (3ULL << 38ULL) + | (coord.y << 25ULL) + | (coord.x << 18ULL) + | (epa << 0ULL); + } else if (hb_mc_config_is_dram(cfg, coord)) { + *bp_eva = 0 + | (2ULL << 38ULL) + | (npody2 << 36ULL) + | (coord.x << 29ULL) + | (epa << 0ULL); + } else { + dma_pr_err(mc, "%s: DMA region not supported on this platform\n", __func__); + return HB_MC_NOIMPL; + } + + return HB_MC_SUCCESS; +} + +/** + * Write memory out to manycore DRAM via DMA + * + * NOTE: This method is declared with __attribute__((weak)) so that a + * platform can define write OR read in its own bsg_manycore_dma.cpp + * implementation, but does not need to declare both. + * + * @param[in] mc A manycore instance initialized with hb_mc_manycore_init() + * @param[in] npa A valid hb_mc_npa_t - must be an L2 cache coordinate + * @param[in] data A buffer to be written out manycore hardware + * @param[in] sz The number of bytes to write to manycore hardware + * @return HB_MC_FAIL if an error occured. HB_MC_SUCCESS otherwise. + */ +int hb_mc_dma_write(hb_mc_manycore_t *mc, + const hb_mc_npa_t *npa, + const void *data, size_t sz) +{ + int rc; + bp_mc_link_t *mcl = reinterpret_cast(mc->platform); + + uint64_t base_eva; + if ((rc = hb_mc_npa_to_bp_eva(mc, npa, &base_eva)) != HB_MC_SUCCESS) { + return rc; + } + + int32_t *buf = (int32_t *) data; + for (int i = 0; i < sz; i+=4) { + uint64_t bp_eva = base_eva + i; + if ((rc = mcl->mmio_write(bp_eva, buf[i/4], 0xf)) != HB_MC_SUCCESS) { + return rc; + } + } + + return HB_MC_SUCCESS; +} + + +/** + * Read memory from manycore DRAM via DMA + * + * NOTE: This method is declared with __attribute__((weak)) so that a + * platform can define write OR read in its own bsg_manycore_dma.cpp + * implementation, but does not need to declare both. + * + * @param[in] mc A manycore instance initialized with hb_mc_manycore_init() + * @param[in] npa A valid hb_mc_npa_t - must be an L2 cache coordinate + * @param[in] data A host buffer to be read into from manycore hardware + * @param[in] sz The number of bytes to read from manycore hardware + * @return HB_MC_FAIL if an error occured. HB_MC_SUCCESS otherwise. + */ +int hb_mc_dma_read(hb_mc_manycore_t *mc, + const hb_mc_npa_t *npa, + void *data, size_t sz) +{ + int rc; + bp_mc_link_t *mcl = reinterpret_cast(mc->platform); + + uint64_t base_eva; + if ((rc = hb_mc_npa_to_bp_eva(mc, npa, &base_eva)) != HB_MC_SUCCESS) { + return rc; + } + + int32_t *buf = (int32_t *) data; + for (int i = 0; i < sz; i+=4) { + uint64_t bp_eva = base_eva + i; + if ((rc = mcl->mmio_read(bp_eva, &buf[i/4])) != HB_MC_SUCCESS) { + return rc; + } + } + + return HB_MC_SUCCESS; +} + +int hb_mc_dma_init(hb_mc_manycore_t *mc) +{ + mc->config.memsys.dma2cache = 1; + return HB_MC_SUCCESS; +} + diff --git a/libraries/features/dma/blackparrot/feature.mk b/libraries/features/dma/blackparrot/feature.mk new file mode 100644 index 000000000..2f6239e84 --- /dev/null +++ b/libraries/features/dma/blackparrot/feature.mk @@ -0,0 +1,44 @@ +# Copyright (c) 2019, University of Washington All rights reserved. +# +# Redistribution and use in source and binary forms, with or without modification, +# are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, this list +# of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, this +# list of conditions and the following disclaimer in the documentation and/or +# other materials provided with the distribution. +# +# Neither the name of the copyright holder nor the names of its contributors may +# be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +DMA_FEATURE_CXXSOURCES += $(LIBRARIES_PATH)/features/dma/blackparrot/bsg_manycore_dma.cpp + +DMA_FEATURE_OBJECTS += $(patsubst %cpp,%o,$(DMA_FEATURE_CXXSOURCES)) +DMA_FEATURE_OBJECTS += $(patsubst %c,%o,$(DMA_FEATURE_CSOURCES)) + +$(DMA_FEATURE_OBJECTS): INCLUDES := -I$(LIBRARIES_PATH) +$(DMA_FEATURE_OBJECTS): INCLUDES += -I$(LIBRARIES_PATH)/features/dma +$(DMA_FEATURE_OBJECTS): CFLAGS := -std=c11 -fPIC -D_GNU_SOURCE -D_BSD_SOURCE -D_DEFAULT_SOURCE $(INCLUDES) +$(DMA_FEATURE_OBJECTS): CXXFLAGS := -std=c++11 -fPIC -D_GNU_SOURCE -D_BSD_SOURCE -D_DEFAULT_SOURCE $(INCLUDES) + +$(BSG_PLATFORM_PATH)/libbsg_manycore_runtime.so.1.0: $(DMA_FEATURE_OBJECTS) + +.PHONY: dma_feature.clean +dma_feature.clean: + rm -f $(DMA_FEATURE_OBJECTS) + +platform.clean: dma_feature.clean diff --git a/libraries/platforms/bigblade-vcs/compilation.mk b/libraries/platforms/bigblade-vcs/compilation.mk index 2b64d5912..195103646 100644 --- a/libraries/platforms/bigblade-vcs/compilation.mk +++ b/libraries/platforms/bigblade-vcs/compilation.mk @@ -26,5 +26,4 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. include $(LIBRARIES_PATH)/platforms/common/dpi/compilation.mk -$(PLATFORM_REGRESSION_OBJECTS): INCLUDES += $(LIBRARIES_PATH)/platforms/common/dpi diff --git a/libraries/platforms/hammerblade-vcs/bsg_manycore_platform.cpp b/libraries/platforms/hammerblade-vcs/bsg_manycore_platform.cpp index fb519c892..1d634f13d 100644 --- a/libraries/platforms/hammerblade-vcs/bsg_manycore_platform.cpp +++ b/libraries/platforms/hammerblade-vcs/bsg_manycore_platform.cpp @@ -13,6 +13,7 @@ // Header Definitions ///////////////////////////////////////////// +#include #include #include @@ -20,40 +21,6 @@ // Globals ///////////////////////////////////////////// -#define HOST_BASE_ADDRESS 0x100000 -#define HOST_PUTCHAR_REG (HOST_BASE_ADDRESS + 0x1000) -#define HOST_GETCHAR_REG (HOST_BASE_ADDRESS + 0x2000) -#define HOST_BOOTROM_REG (HOST_BASE_ADDRESS + 0x3000) - -class bp_mc_link_t { - private: - uint64_t fifo_base_addr = (uint64_t) 0x500000; - volatile uint64_t *bp_req_fifo_data_addr = (volatile uint64_t *) (fifo_base_addr + 0x1000); - volatile int *bp_req_fifo_ctr_addr = (volatile int *) (fifo_base_addr + 0x2000); - volatile uint64_t *mc_rsp_fifo_data_addr = (volatile uint64_t *) (fifo_base_addr + 0x3000); - volatile int *mc_rsp_fifo_ctr_addr = (volatile int *) (fifo_base_addr + 0x4000); - volatile uint64_t *mc_req_fifo_data_addr = (volatile uint64_t *) (fifo_base_addr + 0x5000); - volatile int *mc_req_fifo_ctr_addr = (volatile int *) (fifo_base_addr + 0x6000); - volatile uint64_t *bp_rsp_fifo_data_addr = (volatile uint64_t *) (fifo_base_addr + 0x7000); - volatile int *bp_rsp_fifo_ctr_addr = (volatile int *) (fifo_base_addr + 0x8000); - volatile int *endpoint_credits_addr = (volatile int *) (fifo_base_addr + 0x9000); - - int try_write_bp_request_fifo(uint64_t data); - int try_write_bp_response_fifo(uint64_t data); - int try_read_mc_response_fifo(uint64_t *data); - int try_read_mc_request_fifo(uint64_t *data); - - public: - int tx_fifo_req(hb_mc_request_packet_t *packet); - int tx_fifo_rsp(hb_mc_response_packet_t *packet); - int rx_fifo_req(hb_mc_request_packet_t *packet); - int rx_fifo_rsp(hb_mc_response_packet_t *packet); - int mmio_read(uint64_t address, int32_t *data); - int mmio_write(uint64_t address, int32_t data, uint8_t mask); - int fifo_fence(); - int fifo_drain(); -}; - int bp_mc_link_t::try_write_bp_request_fifo(uint64_t data) { int ctr = *bp_req_fifo_ctr_addr; diff --git a/libraries/platforms/hammerblade-vcs/bsg_manycore_platform.hpp b/libraries/platforms/hammerblade-vcs/bsg_manycore_platform.hpp new file mode 100644 index 000000000..327b7f3c8 --- /dev/null +++ b/libraries/platforms/hammerblade-vcs/bsg_manycore_platform.hpp @@ -0,0 +1,39 @@ +#ifndef BSG_MANYCORE_PLATFORM_HPP +#define BSG_MANYCORE_PLATFORM_HPP + +#define HOST_BASE_ADDRESS 0x100000 +#define HOST_PUTCHAR_REG (HOST_BASE_ADDRESS + 0x1000) +#define HOST_GETCHAR_REG (HOST_BASE_ADDRESS + 0x2000) +#define HOST_BOOTROM_REG (HOST_BASE_ADDRESS + 0x3000) + +class bp_mc_link_t { + private: + uint64_t fifo_base_addr = (uint64_t) 0x500000; + volatile uint64_t *bp_req_fifo_data_addr = (volatile uint64_t *) (fifo_base_addr + 0x1000); + volatile int *bp_req_fifo_ctr_addr = (volatile int *) (fifo_base_addr + 0x2000); + volatile uint64_t *mc_rsp_fifo_data_addr = (volatile uint64_t *) (fifo_base_addr + 0x3000); + volatile int *mc_rsp_fifo_ctr_addr = (volatile int *) (fifo_base_addr + 0x4000); + volatile uint64_t *mc_req_fifo_data_addr = (volatile uint64_t *) (fifo_base_addr + 0x5000); + volatile int *mc_req_fifo_ctr_addr = (volatile int *) (fifo_base_addr + 0x6000); + volatile uint64_t *bp_rsp_fifo_data_addr = (volatile uint64_t *) (fifo_base_addr + 0x7000); + volatile int *bp_rsp_fifo_ctr_addr = (volatile int *) (fifo_base_addr + 0x8000); + volatile int *endpoint_credits_addr = (volatile int *) (fifo_base_addr + 0x9000); + + int try_write_bp_request_fifo(uint64_t data); + int try_write_bp_response_fifo(uint64_t data); + int try_read_mc_response_fifo(uint64_t *data); + int try_read_mc_request_fifo(uint64_t *data); + + public: + int tx_fifo_req(hb_mc_request_packet_t *packet); + int tx_fifo_rsp(hb_mc_response_packet_t *packet); + int rx_fifo_req(hb_mc_request_packet_t *packet); + int rx_fifo_rsp(hb_mc_response_packet_t *packet); + int mmio_read(uint64_t address, int32_t *data); + int mmio_write(uint64_t address, int32_t data, uint8_t mask); + int fifo_fence(); + int fifo_drain(); +}; + +#endif + diff --git a/libraries/platforms/hammerblade-vcs/library.mk b/libraries/platforms/hammerblade-vcs/library.mk index 12ab82a03..7643406d6 100644 --- a/libraries/platforms/hammerblade-vcs/library.mk +++ b/libraries/platforms/hammerblade-vcs/library.mk @@ -27,7 +27,7 @@ PLATFORM_CXXSOURCES += $(LIBRARIES_PATH)/features/profiler/noimpl/bsg_manycore_profiler.cpp PLATFORM_CXXSOURCES += $(LIBRARIES_PATH)/features/tracer/noimpl/bsg_manycore_tracer.cpp -PLATFORM_CXXSOURCES += $(LIBRARIES_PATH)/features/dma/noimpl/bsg_manycore_dma.cpp +PLATFORM_CXXSOURCES += $(LIBRARIES_PATH)/features/dma/blackparrot/bsg_manycore_dma.cpp PLATFORM_CXXSOURCES += $(BSG_PLATFORM_PATH)/bsg_manycore_platform.cpp LIB_CSOURCES += $(BSG_PLATFORM_PATH)/src/argp/argp-ba.c @@ -42,10 +42,7 @@ LIB_CSOURCES += $(BSG_PLATFORM_PATH)/src/argp/argp-xinl.c LIB_CSOURCES += $(BSG_PLATFORM_PATH)/src/flockfile.c LIB_CSOURCES += $(BSG_PLATFORM_PATH)/src/funlockfile.c -# aws-fpga does not provide a DMA feature. Therefore, we use the fragment in -# features/dma/noimpl/feature.mk that simply returns -# HB_MC_NO_IMPL for each function call. -include $(LIBRARIES_PATH)/features/dma/noimpl/feature.mk +include $(LIBRARIES_PATH)/features/dma/blackparrot/feature.mk PLATFORM_OBJECTS += $(patsubst %cpp,%o,$(PLATFORM_CXXSOURCES)) PLATFORM_OBJECTS += $(patsubst %c,%o,$(PLATFORM_CSOURCES)) diff --git a/libraries/platforms/hammerblade-vcs/link.mk b/libraries/platforms/hammerblade-vcs/link.mk index 74478efce..859d18296 100644 --- a/libraries/platforms/hammerblade-vcs/link.mk +++ b/libraries/platforms/hammerblade-vcs/link.mk @@ -66,7 +66,7 @@ REGRESSION_LIBRARIES += $(BSG_PLATFORM_PATH)/libbsg_manycore_regression.a DRAMFS_MKLFS ?= $(BLACKPARROT_SDK_DIR)/install/bin/dramfs_mklfs 128 8192 lfs.c: $(MAKE) $(BSG_MANYCORE_KERNELS) - cp $(BSG_MANYCORE_KERNELS) $(notdir $(BSG_MANYCORE_KERNELS)) + -cp $(BSG_MANYCORE_KERNELS) $(notdir $(BSG_MANYCORE_KERNELS)) $(DRAMFS_MKLFS) $(notdir $(BSG_MANYCORE_KERNELS)) > $@ loader.o: $(TEST_OBJECTS) $(REGRESSION_LIBRARIES) diff --git a/machine.mk b/machine.mk index c6d0e549b..e2f671f2c 100644 --- a/machine.mk +++ b/machine.mk @@ -38,7 +38,7 @@ endif # To switch machines, simply switch the path of BSG_MACHINE_PATH to # another directory with a Makefile.machine.include file. -BSG_MACHINE_PATH ?= $(BSG_F1_DIR)/machines/pod_X1Y1_ruche_X16Y8_hbm_one_pseudo_channel +BSG_MACHINE_PATH ?= $(BSG_F1_DIR)/machines/baseline_v0_32_16 # Convert the machine path to an abspath override BSG_MACHINE_PATH := $(abspath $(BSG_MACHINE_PATH)) diff --git a/platform.mk b/platform.mk index 73d1e8d4e..5ef4b7f0c 100644 --- a/platform.mk +++ b/platform.mk @@ -44,7 +44,7 @@ endif # We default to simulating the AWS machine uinsg Synopsys VCS-MX, # HOWEVER, if VCS_HOME is not defined then we will assume that # VCS/VCS-MX is not installed and try fall-back options -BSG_PLATFORM ?= bigblade-vcs +BSG_PLATFORM ?= dpi-vcs # FIRST check if BSG_PLATFORM is valid. It should match of the # directories in libraries/platforms