diff --git a/python/src/docstrings.hpp b/python/src/docstrings.hpp index fb1def63..ef087b68 100644 --- a/python/src/docstrings.hpp +++ b/python/src/docstrings.hpp @@ -31,17 +31,28 @@ static const char *__doc_kp_Algorithm_Algorithm = R"doc(Main constructor for algorithm with configuration parameters to create the underlying resources. -@param device The Vulkan device to use for creating resources @param -tensors (optional) The tensors to use to create the descriptor -resources @param spirv (optional) The spirv code to use to create the -algorithm @param workgroup (optional) The kp::Workgroup to use for the -dispatch which defaults to kp::Workgroup(tensor[0].size(), 1, 1) if -not set. @param specializationConstants (optional) The std::vector -to use to initialize the specialization constants which cannot be -changed once set. @param pushConstants (optional) The std::vector to -use when initializing the pipeline, which set the size of the push -constants - these can be modified but all new values must have the -same vector size as this initial value.)doc"; +Parameter ``device``: + The Vulkan device to use for creating resources + +Parameter ``tensors``: + (optional) The tensors to use to create the descriptor resources + +Parameter ``spirv``: + (optional) The spirv code to use to create the algorithm + +Parameter ``workgroup``: + (optional) The kp::Workgroup to use for the dispatch which + defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. + +Parameter ``specializationConstants``: + (optional) The templatable param is to be used to initialize the + specialization constants which cannot be changed once set. + +Parameter ``pushConstants``: + (optional) This templatable param is to be used when initializing + the pipeline, which set the size of the push constants - these can + be modified but all new values must have the same data type and + length as otherwise it will result in errors.)doc"; static const char *__doc_kp_Algorithm_createParameters = R"doc()doc"; @@ -51,33 +62,38 @@ static const char *__doc_kp_Algorithm_createShaderModule = R"doc()doc"; static const char *__doc_kp_Algorithm_destroy = R"doc()doc"; -static const char *__doc_kp_Algorithm_getPush = -R"doc(Gets the specialization constants of the current algorithm. +static const char *__doc_kp_Algorithm_getMemObjects = +R"doc(Gets the current memory objects that are used in the algorithm. -@returns The std::vector currently set for push constants)doc"; +Returns: + The list of memory objects used in the algorithm.)doc"; -static const char *__doc_kp_Algorithm_getSpecializationConstants = +static const char *__doc_kp_Algorithm_getPushConstants = R"doc(Gets the specialization constants of the current algorithm. -@returns The std::vector currently set for specialization constants)doc"; +Returns: + The std::vector currently set for push constants)doc"; -static const char *__doc_kp_Algorithm_getTensors = -R"doc(Gets the current tensors that are used in the algorithm. +static const char *__doc_kp_Algorithm_getSpecializationConstants = +R"doc(Gets the specialization constants of the current algorithm. -@returns The list of tensors used in the algorithm.)doc"; +Returns: + The std::vector currently set for specialization constants)doc"; static const char *__doc_kp_Algorithm_getWorkgroup = R"doc(Gets the current workgroup from the algorithm. -@param The kp::Constant to use to set the push constants to use in the -next bindPush(...) calls. The constants provided must be of the same -size as the ones created during initialization.)doc"; +Parameter ``The``: + kp::Constant to use to set the push constants to use in the next + bindPush(...) calls. The constants provided must be of the same + size as the ones created during initialization.)doc"; static const char *__doc_kp_Algorithm_isInit = R"doc(function that checks all the gpu resource components to verify if these have been created and returns true if all are valid. -@returns returns true if the algorithm is currently initialized.)doc"; +Returns: + returns true if the algorithm is currently initialized.)doc"; static const char *__doc_kp_Algorithm_mDescriptorPool = R"doc()doc"; @@ -101,21 +117,29 @@ static const char *__doc_kp_Algorithm_mFreePipelineLayout = R"doc()doc"; static const char *__doc_kp_Algorithm_mFreeShaderModule = R"doc()doc"; +static const char *__doc_kp_Algorithm_mMemObjects = R"doc()doc"; + static const char *__doc_kp_Algorithm_mPipeline = R"doc()doc"; static const char *__doc_kp_Algorithm_mPipelineCache = R"doc()doc"; static const char *__doc_kp_Algorithm_mPipelineLayout = R"doc()doc"; -static const char *__doc_kp_Algorithm_mPushConstants = R"doc()doc"; +static const char *__doc_kp_Algorithm_mPushConstantsData = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mPushConstantsDataTypeMemorySize = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mPushConstantsSize = R"doc()doc"; static const char *__doc_kp_Algorithm_mShaderModule = R"doc()doc"; -static const char *__doc_kp_Algorithm_mSpecializationConstants = R"doc()doc"; +static const char *__doc_kp_Algorithm_mSpecializationConstantsData = R"doc()doc"; -static const char *__doc_kp_Algorithm_mSpirv = R"doc()doc"; +static const char *__doc_kp_Algorithm_mSpecializationConstantsDataTypeMemorySize = R"doc()doc"; + +static const char *__doc_kp_Algorithm_mSpecializationConstantsSize = R"doc()doc"; -static const char *__doc_kp_Algorithm_mTensors = R"doc()doc"; +static const char *__doc_kp_Algorithm_mSpirv = R"doc()doc"; static const char *__doc_kp_Algorithm_mWorkgroup = R"doc()doc"; @@ -123,54 +147,379 @@ static const char *__doc_kp_Algorithm_rebuild = R"doc(Rebuild function to reconstruct algorithm with configuration parameters to create the underlying resources. -@param tensors The tensors to use to create the descriptor resources -@param spirv The spirv code to use to create the algorithm @param -workgroup (optional) The kp::Workgroup to use for the dispatch which -defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. @param -specializationConstants (optional) The std::vector to use to -initialize the specialization constants which cannot be changed once -set. @param pushConstants (optional) The std::vector to use when -initializing the pipeline, which set the size of the push constants - -these can be modified but all new values must have the same vector -size as this initial value.)doc"; +Parameter ``tensors``: + The tensors to use to create the descriptor resources + +Parameter ``spirv``: + The spirv code to use to create the algorithm + +Parameter ``workgroup``: + (optional) The kp::Workgroup to use for the dispatch which + defaults to kp::Workgroup(tensor[0].size(), 1, 1) if not set. + +Parameter ``specializationConstants``: + (optional) The std::vector to use to initialize the + specialization constants which cannot be changed once set. + +Parameter ``pushConstants``: + (optional) The std::vector to use when initializing the + pipeline, which set the size of the push constants - these can be + modified but all new values must have the same vector size as this + initial value.)doc"; static const char *__doc_kp_Algorithm_recordBindCore = R"doc(Records command that binds the "core" algorithm components which consist of binding the pipeline and binding the descriptorsets. -@param commandBuffer Command buffer to record the algorithm resources -to)doc"; +Parameter ``commandBuffer``: + Command buffer to record the algorithm resources to)doc"; static const char *__doc_kp_Algorithm_recordBindPush = R"doc(Records command that binds the push constants to the command buffer provided - it is required that the pushConstants provided are of the same size as the ones provided during initialization. -@param commandBuffer Command buffer to record the algorithm resources -to)doc"; +Parameter ``commandBuffer``: + Command buffer to record the algorithm resources to)doc"; static const char *__doc_kp_Algorithm_recordDispatch = R"doc(Records the dispatch function with the provided template parameters or alternatively using the size of the tensor by default. -@param commandBuffer Command buffer to record the algorithm resources -to)doc"; +Parameter ``commandBuffer``: + Command buffer to record the algorithm resources to)doc"; -static const char *__doc_kp_Algorithm_setPush = +static const char *__doc_kp_Algorithm_setPushConstants = R"doc(Sets the push constants to the new value provided to use in the next bindPush() -@param The kp::Constant to use to set the push constants to use in the -next bindPush(...) calls. The constants provided must be of the same -size as the ones created during initialization.)doc"; +Parameter ``pushConstants``: + The templatable vector is to be used to set the push constants to + use in the next bindPush(...) calls. The constants provided must + be of the same size as the ones created during initialization.)doc"; + +static const char *__doc_kp_Algorithm_setPushConstants_2 = +R"doc(Sets the push constants to the new value provided to use in the next +bindPush() with the raw memory block location and memory size to be +used. + +Parameter ``data``: + The raw data point to copy the data from, without modifying the + pointer. + +Parameter ``size``: + The number of data elements provided in the data + +Parameter ``memorySize``: + The memory size of each of the data elements in bytes.)doc"; static const char *__doc_kp_Algorithm_setWorkgroup = R"doc(Sets the work group to use in the recordDispatch -@param workgroup The kp::Workgroup value to use to update the -algorithm. It must have a value greater than 1 on the x value (index -1) otherwise it will be initialized on the size of the first tensor -(ie. this->mTensor[0]->size()))doc"; +Parameter ``workgroup``: + The kp::Workgroup value to use to update the algorithm. It must + have a value greater than 1 on the x value (index 1) otherwise it + will be initialized on the size of the first tensor (ie. + this->mTensor[0]->size()))doc"; + +static const char *__doc_kp_Image = +R"doc(Image data used in GPU operations. + +Each image would has a respective Vulkan memory and image, which would +be used to store their respective data. The images can be used for GPU +data storage or transfer.)doc"; + +static const char *__doc_kp_Image_2 = R"doc()doc"; + +static const char *__doc_kp_ImageT = R"doc()doc"; + +static const char *__doc_kp_ImageT_ImageT = R"doc()doc"; + +static const char *__doc_kp_ImageT_ImageT_2 = R"doc()doc"; + +static const char *__doc_kp_ImageT_ImageT_3 = R"doc()doc"; + +static const char *__doc_kp_ImageT_ImageT_4 = R"doc()doc"; + +static const char *__doc_kp_ImageT_dataType = R"doc()doc"; + +static const char *__doc_kp_ImageT_operator_array = R"doc()doc"; + +static const char *__doc_kp_ImageT_setData = R"doc()doc"; + +static const char *__doc_kp_ImageT_vector = R"doc()doc"; + +static const char *__doc_kp_Image_Image = +R"doc(Constructor with data provided which would be used to create the +respective vulkan image and memory. + +Parameter ``physicalDevice``: + The physical device to use to fetch properties + +Parameter ``device``: + The device to use to create the image and memory from + +Parameter ``data``: + Non-zero-sized vector of data that will be used by the image + +Parameter ``width``: + Width of the image in pixels + +Parameter ``height``: + Height of the image in pixels + +Parameter ``numChannels``: + The number of channels in the image + +Parameter ``dataType``: + Data type for the image which is of type ImageDataTypes + +Parameter ``memoryType``: + Type for the image which is of type MemoryTypes + +Parameter ``tiling``: + Tiling mode to use for the image.)doc"; + +static const char *__doc_kp_Image_Image_2 = +R"doc(Constructor with data provided which would be used to create the +respective vulkan image and memory. No tiling has been provided so +will be inferred from \p memoryType. + +Parameter ``physicalDevice``: + The physical device to use to fetch properties + +Parameter ``device``: + The device to use to create the image and memory from + +Parameter ``data``: + Non-zero-sized vector of data that will be used by the image + +Parameter ``width``: + Width of the image in pixels + +Parameter ``height``: + Height of the image in pixels + +Parameter ``numChannels``: + The number of channels in the image + +Parameter ``dataType``: + Data type for the image which is of type ImageDataTypes + +Parameter ``memoryType``: + Type for the image which is of type MemoryTypes + +Parameter ``tiling``: + Tiling mode to use for the image.)doc"; + +static const char *__doc_kp_Image_ImageDataTypes = R"doc()doc"; + +static const char *__doc_kp_Image_ImageDataTypes_eF16 = R"doc()doc"; + +static const char *__doc_kp_Image_ImageDataTypes_eF32 = R"doc()doc"; + +static const char *__doc_kp_Image_ImageDataTypes_eS16 = R"doc()doc"; + +static const char *__doc_kp_Image_ImageDataTypes_eS32 = R"doc()doc"; + +static const char *__doc_kp_Image_ImageDataTypes_eS8 = R"doc()doc"; + +static const char *__doc_kp_Image_ImageDataTypes_eU16 = R"doc()doc"; + +static const char *__doc_kp_Image_ImageDataTypes_eU32 = R"doc()doc"; + +static const char *__doc_kp_Image_ImageDataTypes_eU8 = R"doc()doc"; + +static const char *__doc_kp_Image_allocateBindMemory = R"doc()doc"; + +static const char *__doc_kp_Image_allocateMemoryCreateGPUResources = R"doc()doc"; + +static const char *__doc_kp_Image_constructDescriptorImageInfo = R"doc()doc"; + +static const char *__doc_kp_Image_constructDescriptorSet = +R"doc(Adds this object to a Vulkan descriptor set at \p binding. + +Parameter ``descriptorSet``: + The descriptor set to add to. + +Parameter ``binding``: + The binding number to use. + +Returns: + Add this object to a descriptor set at \p binding.)doc"; + +static const char *__doc_kp_Image_createImage = R"doc()doc"; + +static const char *__doc_kp_Image_dataType = +R"doc(Retrieve the data type of the image (host, device, storage) + +Returns: + Data type of image of type kp::image::ImageDataTypes)doc"; + +static const char *__doc_kp_Image_destroy = +R"doc(Destroys and frees the GPU resources which include the image and +memory.)doc"; + +static const char *__doc_kp_Image_elementTypeSize = R"doc()doc"; + +static const char *__doc_kp_Image_getFormat = R"doc()doc"; + +static const char *__doc_kp_Image_getHeight = +R"doc(Retreive the height in the image in pixels + +Returns: + Height of the image in pixels)doc"; + +static const char *__doc_kp_Image_getNumChannels = +R"doc(Retreive the number of channels in the image + +Returns: + Number of channels in the image)doc"; + +static const char *__doc_kp_Image_getPrimaryImage = R"doc()doc"; + +static const char *__doc_kp_Image_getPrimaryImageUsageFlags = R"doc()doc"; + +static const char *__doc_kp_Image_getPrimaryMemoryPropertyFlags = R"doc()doc"; + +static const char *__doc_kp_Image_getStagingImageUsageFlags = R"doc()doc"; + +static const char *__doc_kp_Image_getStagingMemoryPropertyFlags = R"doc()doc"; + +static const char *__doc_kp_Image_getTensorDataType = R"doc()doc"; + +static const char *__doc_kp_Image_getWidth = +R"doc(Retreive the width in the image in pixels + +Returns: + Width of the image in pixels)doc"; + +static const char *__doc_kp_Image_init = R"doc()doc"; + +static const char *__doc_kp_Image_isInit = +R"doc(Check whether image is initialized based on the created gpu resources. + +Returns: + Boolean stating whether image is initialized)doc"; + +static const char *__doc_kp_Image_mDataType = R"doc()doc"; + +static const char *__doc_kp_Image_mDescriptorImageInfo = R"doc()doc"; + +static const char *__doc_kp_Image_mFreePrimaryImage = R"doc()doc"; + +static const char *__doc_kp_Image_mFreeStagingImage = R"doc()doc"; + +static const char *__doc_kp_Image_mHeight = R"doc()doc"; + +static const char *__doc_kp_Image_mImageView = R"doc()doc"; + +static const char *__doc_kp_Image_mNumChannels = R"doc()doc"; + +static const char *__doc_kp_Image_mPrimaryImage = R"doc()doc"; + +static const char *__doc_kp_Image_mPrimaryImageLayout = R"doc()doc"; + +static const char *__doc_kp_Image_mStagingImage = R"doc()doc"; + +static const char *__doc_kp_Image_mStagingImageLayout = R"doc()doc"; + +static const char *__doc_kp_Image_mTiling = R"doc()doc"; + +static const char *__doc_kp_Image_mWidth = R"doc()doc"; + +static const char *__doc_kp_Image_rebuild = +R"doc(Function to trigger reinitialisation of the image and memory with new +data as well as new potential device type. + +Parameter ``data``: + Vector of data to use to initialise image from)doc"; + +static const char *__doc_kp_Image_recordCopyFrom = +R"doc(Records a copy from the memory of the image provided to the current +image. This is intended to pass memory into a processing, to perform a +staging image transfer, or to gather output (between others). + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``copyFromImage``: + Image to copy the data from)doc"; + +static const char *__doc_kp_Image_recordCopyFrom_2 = +R"doc(Records a copy from the memory of the tensor provided to the current +image. This is intended to pass memory into a processing, to perform a +staging image transfer, or to gather output (between others). + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``copyFromTensor``: + Tensor to copy the data from)doc"; + +static const char *__doc_kp_Image_recordCopyFromDeviceToStaging = +R"doc(Records a copy from the internal device memory to the staging memory +using an optional barrier to wait for the operation. This function +would only be relevant for kp::images of type eDevice. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into)doc"; + +static const char *__doc_kp_Image_recordCopyFromStagingToDevice = +R"doc(Records a copy from the internal staging memory to the device memory +using an optional barrier to wait for the operation. This function +would only be relevant for kp::images of type eDevice. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into)doc"; + +static const char *__doc_kp_Image_recordCopyImage = R"doc()doc"; + +static const char *__doc_kp_Image_recordCopyImageFromTensor = R"doc()doc"; + +static const char *__doc_kp_Image_recordImageMemoryBarrier = R"doc()doc"; + +static const char *__doc_kp_Image_recordPrimaryMemoryBarrier = +R"doc(Records the image memory barrier into the primary image and command +buffer which ensures that relevant data transfers are carried out +correctly. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``srcAccessMask``: + Access flags for source access mask + +Parameter ``dstAccessMask``: + Access flags for destination access mask + +Parameter ``scrStageMask``: + Pipeline stage flags for source stage mask + +Parameter ``dstStageMask``: + Pipeline stage flags for destination stage mask)doc"; + +static const char *__doc_kp_Image_recordStagingMemoryBarrier = +R"doc(Records the image memory barrier into the staging image and command +buffer which ensures that relevant data transfers are carried out +correctly. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``srcAccessMask``: + Access flags for source access mask + +Parameter ``dstAccessMask``: + Access flags for destination access mask + +Parameter ``scrStageMask``: + Pipeline stage flags for source stage mask + +Parameter ``dstStageMask``: + Pipeline stage flags for destination stage mask)doc"; + +static const char *__doc_kp_Image_toString = R"doc()doc"; static const char *__doc_kp_Manager = R"doc(Base orchestrator which creates and manages device and child @@ -184,32 +533,81 @@ static const char *__doc_kp_Manager_Manager_2 = R"doc(Similar to base constructor but allows for further configuration to use when creating the Vulkan resources. -@param physicalDeviceIndex The index of the physical device to use -@param familyQueueIndices (Optional) List of queue indices to add for -explicit allocation @param desiredExtensions The desired extensions to -load from physicalDevice)doc"; +Parameter ``physicalDeviceIndex``: + The index of the physical device to use + +Parameter ``familyQueueIndices``: + (Optional) List of queue indices to add for explicit allocation + +Parameter ``desiredExtensions``: + The desired extensions to load from physicalDevice)doc"; static const char *__doc_kp_Manager_Manager_3 = R"doc(Manager constructor which allows your own vulkan application to integrate with the kompute use. -@param instance Vulkan compute instance to base this application -@param physicalDevice Vulkan physical device to use for application -@param device Vulkan logical device to use for all base resources -@param physicalDeviceIndex Index for vulkan physical device used)doc"; +Parameter ``instance``: + Vulkan compute instance to base this application + +Parameter ``physicalDevice``: + Vulkan physical device to use for application + +Parameter ``device``: + Vulkan logical device to use for all base resources + +Parameter ``physicalDeviceIndex``: + Index for vulkan physical device used)doc"; static const char *__doc_kp_Manager_algorithm = +R"doc(Default non-template function that can be used to create algorithm +objects which provides default types to the push and spec constants as +floats. + +Parameter ``memObjects``: + (optional) The mem objects to initialise the algorithm with + +Parameter ``spirv``: + (optional) The SPIRV bytes for the algorithm to dispatch + +Parameter ``workgroup``: + (optional) kp::Workgroup for algorithm to use, and defaults to + (tensor[0].size(), 1, 1) + +Parameter ``specializationConstants``: + (optional) float vector to use for specialization constants, and + defaults to an empty constant + +Parameter ``pushConstants``: + (optional) float vector to use for push constants, and defaults to + an empty constant + +Returns: + Shared pointer with initialised algorithm)doc"; + +static const char *__doc_kp_Manager_algorithm_2 = R"doc(Create a managed algorithm that will be destroyed by this manager if it hasn't been destroyed by its reference count going to zero. -@param tensors (optional) The tensors to initialise the algorithm with -@param spirv (optional) The SPIRV bytes for the algorithm to dispatch -@param workgroup (optional) kp::Workgroup for algorithm to use, and -defaults to (tensor[0].size(), 1, 1) @param specializationConstants -(optional) kp::Constant to use for specialization constants, and -defaults to an empty constant @param pushConstants (optional) -kp::Constant to use for push constants, and defaults to an empty -constant @returns Shared pointer with initialised algorithm)doc"; +Parameter ``memObjects``: + (optional) The mem objects to initialise the algorithm with + +Parameter ``spirv``: + (optional) The SPIRV bytes for the algorithm to dispatch + +Parameter ``workgroup``: + (optional) kp::Workgroup for algorithm to use, and defaults to + (tensor[0].size(), 1, 1) + +Parameter ``specializationConstants``: + (optional) templatable vector parameter to use for specialization + constants, and defaults to an empty constant + +Parameter ``pushConstants``: + (optional) templatable vector parameter to use for push constants, + and defaults to an empty constant + +Returns: + Shared pointer with initialised algorithm)doc"; static const char *__doc_kp_Manager_clear = R"doc(Run a pseudo-garbage collection to release all the managed resources @@ -221,10 +619,61 @@ static const char *__doc_kp_Manager_createInstance = R"doc()doc"; static const char *__doc_kp_Manager_destroy = R"doc(Destroy the GPU resources and all managed resources by manager.)doc"; +static const char *__doc_kp_Manager_getDeviceProperties = +R"doc(Information about the current device. + +Returns: + vk::PhysicalDeviceProperties containing information about the + device)doc"; + +static const char *__doc_kp_Manager_getVkInstance = +R"doc(The current Vulkan instance. + +Returns: + a shared pointer to the current Vulkan instance held by this + object)doc"; + +static const char *__doc_kp_Manager_image = R"doc()doc"; + +static const char *__doc_kp_Manager_image_2 = R"doc()doc"; + +static const char *__doc_kp_Manager_image_3 = R"doc()doc"; + +static const char *__doc_kp_Manager_image_4 = R"doc()doc"; + +static const char *__doc_kp_Manager_imageT = +R"doc(Create a managed image that will be destroyed by this manager if it +hasn't been destroyed by its reference count going to zero. + +Parameter ``data``: + The data to initialize the image with + +Parameter ``tensorType``: + The type of image to initialize + +Returns: + Shared pointer with initialised image)doc"; + +static const char *__doc_kp_Manager_imageT_2 = R"doc()doc"; + +static const char *__doc_kp_Manager_imageT_3 = R"doc()doc"; + +static const char *__doc_kp_Manager_imageT_4 = R"doc()doc"; + +static const char *__doc_kp_Manager_listDevices = +R"doc(List the devices available in the current vulkan instance. + +Returns: + vector of physical devices containing their respective properties)doc"; + static const char *__doc_kp_Manager_mComputeQueueFamilyIndices = R"doc()doc"; static const char *__doc_kp_Manager_mComputeQueues = R"doc()doc"; +static const char *__doc_kp_Manager_mDebugDispatcher = R"doc()doc"; + +static const char *__doc_kp_Manager_mDebugReportCallback = R"doc()doc"; + static const char *__doc_kp_Manager_mDevice = R"doc()doc"; static const char *__doc_kp_Manager_mFreeDevice = R"doc()doc"; @@ -237,6 +686,8 @@ static const char *__doc_kp_Manager_mManageResources = R"doc()doc"; static const char *__doc_kp_Manager_mManagedAlgorithms = R"doc()doc"; +static const char *__doc_kp_Manager_mManagedImages = R"doc()doc"; + static const char *__doc_kp_Manager_mManagedSequences = R"doc()doc"; static const char *__doc_kp_Manager_mManagedTensors = R"doc()doc"; @@ -247,10 +698,15 @@ static const char *__doc_kp_Manager_sequence = R"doc(Create a managed sequence that will be destroyed by this manager if it hasn't been destroyed by its reference count going to zero. -@param queueIndex The queue to use from the available queues @param -nrOfTimestamps The maximum number of timestamps to allocate. If zero -(default), disables latching of timestamps. @returns Shared pointer -with initialised sequence)doc"; +Parameter ``queueIndex``: + The queue to use from the available queues + +Parameter ``nrOfTimestamps``: + The maximum number of timestamps to allocate. If zero (default), + disables latching of timestamps. + +Returns: + Shared pointer with initialised sequence)doc"; static const char *__doc_kp_Manager_tensor = R"doc()doc"; @@ -260,9 +716,209 @@ static const char *__doc_kp_Manager_tensorT = R"doc(Create a managed tensor that will be destroyed by this manager if it hasn't been destroyed by its reference count going to zero. -@param data The data to initialize the tensor with @param tensorType -The type of tensor to initialize @returns Shared pointer with -initialised tensor)doc"; +Parameter ``data``: + The data to initialize the tensor with + +Parameter ``tensorType``: + The type of tensor to initialize + +Returns: + Shared pointer with initialised tensor)doc"; + +static const char *__doc_kp_Memory = R"doc()doc"; + +static const char *__doc_kp_Memory_MemoryTypes = +R"doc(Type for memory created: Device allows memory to be transferred from +staging memory. Staging are host memory visible. Storage are device +visible but are not set up to transfer or receive data (only for +shader storage).)doc"; + +static const char *__doc_kp_Memory_MemoryTypes_eDevice = R"doc(< Type is device memory, source and destination)doc"; + +static const char *__doc_kp_Memory_MemoryTypes_eDeviceAndHost = R"doc()doc"; + +static const char *__doc_kp_Memory_MemoryTypes_eHost = R"doc(< Type is host memory, source and destination)doc"; + +static const char *__doc_kp_Memory_MemoryTypes_eStorage = R"doc(< Type is Device memory (only))doc"; + +static const char *__doc_kp_Memory_constructDescriptorSet = +R"doc(Adds this object to a Vulkan descriptor set at \p binding. + +Parameter ``descriptorSet``: + The descriptor set to add to. + +Parameter ``binding``: + The binding number to use. + +Returns: + Add this object to a descriptor set at \p binding.)doc"; + +static const char *__doc_kp_Memory_data = +R"doc(Template to return the pointer data converted by specific type, which +would be any of the supported types including float, double, int32, +uint32 and bool. + +Returns: + Pointer to raw memory containing raw bytes data of Tensor/Image.)doc"; + +static const char *__doc_kp_Memory_data_2 = +R"doc(Return the pointer data cast to float. + +Returns: + Pointer to raw memory containing raw bytes data of Tensor/Image. + This is the default, for convenience.)doc"; + +static const char *__doc_kp_Memory_dataTypeMemorySize = +R"doc(Returns the total size of a single element of the respective data type +that this memory object holds. + +Returns: + Unsigned integer representing the memory of a single element of + the respective data type.)doc"; + +static const char *__doc_kp_Memory_getDescriptorType = R"doc()doc"; + +static const char *__doc_kp_Memory_isInit = +R"doc(Check whether tensor/image is initialized based on the created gpu +resources. + +Returns: + Boolean stating whether tensor is initialized)doc"; + +static const char *__doc_kp_Memory_mDataTypeMemorySize = R"doc()doc"; + +static const char *__doc_kp_Memory_mDescriptorType = R"doc()doc"; + +static const char *__doc_kp_Memory_mDevice = R"doc()doc"; + +static const char *__doc_kp_Memory_mFreePrimaryMemory = R"doc()doc"; + +static const char *__doc_kp_Memory_mFreeStagingMemory = R"doc()doc"; + +static const char *__doc_kp_Memory_mMemoryType = R"doc()doc"; + +static const char *__doc_kp_Memory_mPhysicalDevice = R"doc()doc"; + +static const char *__doc_kp_Memory_mPrimaryMemory = R"doc()doc"; + +static const char *__doc_kp_Memory_mRawData = R"doc()doc"; + +static const char *__doc_kp_Memory_mSize = R"doc()doc"; + +static const char *__doc_kp_Memory_mStagingMemory = R"doc()doc"; + +static const char *__doc_kp_Memory_mUnmapMemory = R"doc()doc"; + +static const char *__doc_kp_Memory_mapRawData = R"doc()doc"; + +static const char *__doc_kp_Memory_memorySize = +R"doc(Returns the total memory size of the data contained by the memory +object which would equate to (this->size() * +this->dataTypeMemorySize()) + +Returns: + Unsigned integer representing the total memory size of the data + contained by the image object.)doc"; + +static const char *__doc_kp_Memory_memoryType = +R"doc(Retrieve the image type of the image + +Returns: + image type of image)doc"; + +static const char *__doc_kp_Memory_rawData = +R"doc(Retrieve the raw data via the pointer to the memory that contains the +raw memory of this current tensor. This tensor gets changed to a +nullptr when the Tensor is removed. + +Returns: + Pointer to raw memory containing raw bytes data of Tensor/Image.)doc"; + +static const char *__doc_kp_Memory_recordCopyFromDeviceToStaging = +R"doc(Records a copy from the internal device memory to the staging memory +using an optional barrier to wait for the operation. This function +would only be relevant for kp::Tensors of type eDevice. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into)doc"; + +static const char *__doc_kp_Memory_recordCopyFromStagingToDevice = +R"doc(Records a copy from the internal staging memory to the device memory +using an optional barrier to wait for the operation. This function +would only be relevant for kp::Tensors of type eDevice. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into)doc"; + +static const char *__doc_kp_Memory_recordPrimaryMemoryBarrier = +R"doc(Records the buffer memory barrier into the primary buffer and command +buffer which ensures that relevant data transfers are carried out +correctly. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``srcAccessMask``: + Access flags for source access mask + +Parameter ``dstAccessMask``: + Access flags for destination access mask + +Parameter ``scrStageMask``: + Pipeline stage flags for source stage mask + +Parameter ``dstStageMask``: + Pipeline stage flags for destination stage mask)doc"; + +static const char *__doc_kp_Memory_recordStagingMemoryBarrier = +R"doc(Records the buffer memory barrier into the staging buffer and command +buffer which ensures that relevant data transfers are carried out +correctly. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``srcAccessMask``: + Access flags for source access mask + +Parameter ``dstAccessMask``: + Access flags for destination access mask + +Parameter ``scrStageMask``: + Pipeline stage flags for source stage mask + +Parameter ``dstStageMask``: + Pipeline stage flags for destination stage mask)doc"; + +static const char *__doc_kp_Memory_setRawData = +R"doc(Sets / resets the data of the tensor/image which is directly done on +the GPU host visible memory available by the tensor.)doc"; + +static const char *__doc_kp_Memory_size = +R"doc(Returns the size/magnitude of the Tensor/Image, which will be the +total number of elements across all dimensions + +Returns: + Unsigned integer representing the total number of elements)doc"; + +static const char *__doc_kp_Memory_toString = R"doc()doc"; + +static const char *__doc_kp_Memory_unmapRawData = R"doc()doc"; + +static const char *__doc_kp_Memory_vector = +R"doc(Template to get the data of the current tensor/image as a vector of +specific type, which would be any of the supported types including +float, double, int32, uint32 and bool. + +Returns: + Vector of type provided by template.)doc"; + +static const char *__doc_kp_Memory_vector_2 = +R"doc(Get the data of the current tensor/image as a vector of float. This is +the default, for convenience. + +Returns: + Vector of floats.)doc"; static const char *__doc_kp_OpAlgoDispatch = R"doc(Operation that provides a general abstraction that simplifies the use @@ -274,22 +930,31 @@ static const char *__doc_kp_OpAlgoDispatch_OpAlgoDispatch = R"doc(Constructor that stores the algorithm to use as well as the relevant push constants to override when recording. -@param algorithm The algorithm object to use for dispatch @param -pushConstants The push constants to use for override)doc"; +Parameter ``algorithm``: + The algorithm object to use for dispatch + +Parameter ``pushConstants``: + The push constants to use for override)doc"; static const char *__doc_kp_OpAlgoDispatch_mAlgorithm = R"doc()doc"; -static const char *__doc_kp_OpAlgoDispatch_mPushConstants = R"doc()doc"; +static const char *__doc_kp_OpAlgoDispatch_mPushConstantsData = R"doc()doc"; + +static const char *__doc_kp_OpAlgoDispatch_mPushConstantsDataTypeMemorySize = R"doc()doc"; + +static const char *__doc_kp_OpAlgoDispatch_mPushConstantsSize = R"doc()doc"; static const char *__doc_kp_OpAlgoDispatch_postEval = R"doc(Does not perform any postEval commands. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpAlgoDispatch_preEval = R"doc(Does not perform any preEval commands. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpAlgoDispatch_record = R"doc(This records the commands that are to be sent to the GPU. This @@ -299,7 +964,8 @@ sends the shader processing to the gpu. This function also records the GPU memory copy of the output data for the staging buffer so it can be read by the host. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase = R"doc(Base Operation which provides the high level interface that Kompute @@ -317,7 +983,8 @@ worth noting that there are situations where eval can be called multiple times, so the resources that are destroyed should not require a re-init unless explicitly provided by the user. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase_preEval = R"doc(Pre eval is called before the Sequence has called eval and submitted @@ -327,14 +994,226 @@ It's worth noting that there are situations where eval can be called multiple times, so the resources that are created should be idempotent in case it's called multiple times in a row. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpBase_record = R"doc(The record function is intended to only send a record command or run commands that are expected to record operations that are to be submitted as a batch into the GPU. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageCopy = +R"doc(Operation that copies the data from the first image to the rest of the +images provided, using a record command for all the vectors. This +operation does not own/manage the memory of the images passed to it. +The operation must only receive images of type)doc"; + +static const char *__doc_kp_OpImageCopyToTensor = +R"doc(Operation that copies the data from the first image to the rest of the +images provided, using a record command for all the vectors. This +operation does not own/manage the memory of the images passed to it. +The operation must only receive images of type)doc"; + +static const char *__doc_kp_OpImageCopyToTensor_OpImageCopyToTensor = +R"doc(Default constructor with parameters that provides the core vulkan +resources and the images that will be used in the operation. + +Parameter ``images``: + Images that will be used to create in operation.)doc"; + +static const char *__doc_kp_OpImageCopyToTensor_mImage = R"doc()doc"; + +static const char *__doc_kp_OpImageCopyToTensor_mTensors = R"doc()doc"; + +static const char *__doc_kp_OpImageCopyToTensor_postEval = +R"doc(Copies the local vectors for all the images to sync the data with the +gpu. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageCopyToTensor_preEval = +R"doc(Does not perform any preEval commands. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageCopyToTensor_record = +R"doc(Records the copy commands from the first image into all the other +images provided. Also optionally records a barrier. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageCopy_OpImageCopy = +R"doc(Default constructor with parameters that provides the core vulkan +resources and the images that will be used in the operation. + +Parameter ``images``: + Images that will be used to create in operation.)doc"; + +static const char *__doc_kp_OpImageCopy_mImages = R"doc()doc"; + +static const char *__doc_kp_OpImageCopy_postEval = +R"doc(Copies the local vectors for all the images to sync the data with the +gpu. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageCopy_preEval = +R"doc(Does not perform any preEval commands. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageCopy_record = +R"doc(Records the copy commands from the first image into all the other +images provided. Also optionally records a barrier. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageSyncDevice = +R"doc(Operation that syncs image's device by mapping local data into the +device memory. For ImageTypes::eDevice it will use a record operation +for the memory to be syncd into GPU memory which means that the +operation will be done in sync with GPU commands. For +ImageTypes::eHost it will only map the data into host memory which +will happen during preEval before the recorded commands are +dispatched.)doc"; + +static const char *__doc_kp_OpImageSyncDevice_OpImageSyncDevice = +R"doc(Default constructor with parameters that provides the core vulkan +resources and the images that will be used in the operation. The +tensos provided cannot be of type ImageTypes::eStorage. + +Parameter ``images``: + Images that will be used to create in operation.)doc"; + +static const char *__doc_kp_OpImageSyncDevice_mImages = R"doc()doc"; + +static const char *__doc_kp_OpImageSyncDevice_postEval = +R"doc(Does not perform any postEval commands. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageSyncDevice_preEval = +R"doc(Does not perform any preEval commands. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageSyncDevice_record = +R"doc(For device images, it records the copy command for the image to copy +the data from its staging to device memory. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageSyncLocal = +R"doc(Operation that syncs image's local memory by mapping device data into +the local CPU memory. For ImageTypes::eDevice it will use a record +operation for the memory to be syncd into GPU memory which means that +the operation will be done in sync with GPU commands. For +ImageTypes::eHost it will only map the data into host memory which +will happen during preEval before the recorded commands are +dispatched.)doc"; + +static const char *__doc_kp_OpImageSyncLocal_OpImageSyncLocal = +R"doc(Default constructor with parameters that provides the core vulkan +resources and the images that will be used in the operation. The +images provided cannot be of type ImageTypes::eStorage. + +Parameter ``images``: + Images that will be used to create in operation.)doc"; + +static const char *__doc_kp_OpImageSyncLocal_mImages = R"doc()doc"; + +static const char *__doc_kp_OpImageSyncLocal_postEval = +R"doc(For host images it performs the map command from the host memory into +local memory. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageSyncLocal_preEval = +R"doc(Does not perform any preEval commands. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpImageSyncLocal_record = +R"doc(For device images, it records the copy command for the image to copy +the data from its device to staging memory. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpMemoryBarrier = +R"doc(Operation that provides a general abstraction that simplifies the use +of algorithm and parameter components which can be used with shaders. +It exposes the pipeline barrier functionality specifically for memory +barriers that can be configured through the respective source and +destination masks)doc"; + +static const char *__doc_kp_OpMemoryBarrier_OpMemoryBarrier = +R"doc(Constructor that stores mem objects as well as memory barrier +parameters to be used to create a pipeline barrier on the respective +primary or staging tensor. + +Parameter ``memObjects``: + The mem objects to apply the memory barriers on + +Parameter ``srcAccessMask``: + The kp::AccessFlagBits for the source access mask + +Parameter ``dstAccessMask``: + The kp::AccessFlagBits for the destination access mask + +Parameter ``srcStageMask``: + The kp::PipelineStageFlagBits for the source stage mask + +Parameter ``dstStageMask``: + The kp::PipelineStageFlagBits for the destination stage mask + +Parameter ``barrierOnPrimary``: + Boolean to select primary or secondary buffers on mem objects)doc"; + +static const char *__doc_kp_OpMemoryBarrier_mBarrierOnPrimary = R"doc()doc"; + +static const char *__doc_kp_OpMemoryBarrier_mDstAccessMask = R"doc()doc"; + +static const char *__doc_kp_OpMemoryBarrier_mDstStageMask = R"doc()doc"; + +static const char *__doc_kp_OpMemoryBarrier_mMemObjects = R"doc()doc"; + +static const char *__doc_kp_OpMemoryBarrier_mSrcAccessMask = R"doc()doc"; + +static const char *__doc_kp_OpMemoryBarrier_mSrcStageMask = R"doc()doc"; + +static const char *__doc_kp_OpMemoryBarrier_postEval = +R"doc(Does not perform any postEval commands. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpMemoryBarrier_preEval = +R"doc(Does not perform any preEval commands. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpMemoryBarrier_record = +R"doc(This records the memory barrier with the access and stage masks +provided across all relevant tensors. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpMult = R"doc(Operation that performs multiplication on two tensors and outpus on @@ -345,9 +1224,12 @@ R"doc(Default constructor with parameters that provides the bare minimum requirements for the operations to be able to create and manage their sub-components. -@param tensors Tensors that are to be used in this operation @param -algorithm An algorithm that will be overridden with the OpMult shader -data and the tensors provided which are expected to be 3)doc"; +Parameter ``memObjects``: + Memory objects that are to be used in this operation + +Parameter ``algorithm``: + An algorithm that will be overridden with the OpMult shader data + and the tensors provided which are expected to be 3)doc"; static const char *__doc_kp_OpTensorCopy = R"doc(Operation that copies the data from the first tensor to the rest of @@ -355,11 +1237,49 @@ the tensors provided, using a record command for all the vectors. This operation does not own/manage the memory of the tensors passed to it. The operation must only receive tensors of type)doc"; +static const char *__doc_kp_OpTensorCopyToImage = +R"doc(Operation that copies the data from the first tensor to the vector of +images provided, using a record command for all the vectors. This +operation does not own/manage the memory of the tensor/images passed +to it.)doc"; + +static const char *__doc_kp_OpTensorCopyToImage_OpTensorCopyToImage = +R"doc(Default constructor with parameters that provides the core vulkan +resources and the tensors/images that will be used in the operation. + +Parameter ``tensors``: + Tensors that will be used to create in operation.)doc"; + +static const char *__doc_kp_OpTensorCopyToImage_mImages = R"doc()doc"; + +static const char *__doc_kp_OpTensorCopyToImage_mTensor = R"doc()doc"; + +static const char *__doc_kp_OpTensorCopyToImage_postEval = +R"doc(Copies the local vectors for all the tensors to sync the data with the +gpu. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorCopyToImage_preEval = +R"doc(Does not perform any preEval commands. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + +static const char *__doc_kp_OpTensorCopyToImage_record = +R"doc(Records the copy commands from the first tensor into all the other +tensors provided. Also optionally records a barrier. + +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; + static const char *__doc_kp_OpTensorCopy_OpTensorCopy = R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. -@param tensors Tensors that will be used to create in operation.)doc"; +Parameter ``tensors``: + Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorCopy_mTensors = R"doc()doc"; @@ -367,18 +1287,21 @@ static const char *__doc_kp_OpTensorCopy_postEval = R"doc(Copies the local vectors for all the tensors to sync the data with the gpu. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorCopy_preEval = R"doc(Does not perform any preEval commands. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorCopy_record = R"doc(Records the copy commands from the first tensor into all the other tensors provided. Also optionally records a barrier. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice = R"doc(Operation that syncs tensor's device by mapping local data into the @@ -394,41 +1317,46 @@ R"doc(Default constructor with parameters that provides the core vulkan resources and the tensors that will be used in the operation. The tensos provided cannot be of type TensorTypes::eStorage. -@param tensors Tensors that will be used to create in operation.)doc"; +Parameter ``tensors``: + Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorSyncDevice_mTensors = R"doc()doc"; static const char *__doc_kp_OpTensorSyncDevice_postEval = R"doc(Does not perform any postEval commands. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice_preEval = R"doc(Does not perform any preEval commands. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncDevice_record = R"doc(For device tensors, it records the copy command for the tensor to copy the data from its staging to device memory. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal = -R"doc(Operation that syncs tensor's local memory by mapping device data into -the local CPU memory. For TensorTypes::eDevice it will use a record -operation for the memory to be syncd into GPU memory which means that -the operation will be done in sync with GPU commands. For -TensorTypes::eHost it will only map the data into host memory which +R"doc(Operation that syncs mem object's local memory by mapping device data +into the local CPU memory. For MemoryTypes::eDevice it will use a +record operation for the memory to be syncd into GPU memory which +means that the operation will be done in sync with GPU commands. For +MemoryTypes::eHost it will only map the data into host memory which will happen during preEval before the recorded commands are dispatched.)doc"; static const char *__doc_kp_OpTensorSyncLocal_OpTensorSyncLocal = R"doc(Default constructor with parameters that provides the core vulkan -resources and the tensors that will be used in the operation. The -tensors provided cannot be of type TensorTypes::eStorage. +resources and the memory that will be used in the operation. The +memory provided cannot be of type MemoryTypes::eStorage. -@param tensors Tensors that will be used to create in operation.)doc"; +Parameter ``tensors``: + Tensors that will be used to create in operation.)doc"; static const char *__doc_kp_OpTensorSyncLocal_mTensors = R"doc()doc"; @@ -436,18 +1364,21 @@ static const char *__doc_kp_OpTensorSyncLocal_postEval = R"doc(For host tensors it performs the map command from the host memory into local memory. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal_preEval = R"doc(Does not perform any preEval commands. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_OpTensorSyncLocal_record = R"doc(For device tensors, it records the copy command for the tensor to copy the data from its device to staging memory. -@param commandBuffer The command buffer to record the command into.)doc"; +Parameter ``commandBuffer``: + The command buffer to record the command into.)doc"; static const char *__doc_kp_Sequence = R"doc(Container of operations that can be sent to GPU as batch)doc"; @@ -455,16 +1386,24 @@ static const char *__doc_kp_Sequence_Sequence = R"doc(Main constructor for sequence which requires core vulkan components to generate all dependent resources. -@param physicalDevice Vulkan physical device @param device Vulkan -logical device @param computeQueue Vulkan compute queue @param -queueIndex Vulkan compute queue index in device @param totalTimestamps -Maximum number of timestamps to allocate)doc"; +Parameter ``physicalDevice``: + Vulkan physical device + +Parameter ``device``: + Vulkan logical device + +Parameter ``computeQueue``: + Vulkan compute queue + +Parameter ``queueIndex``: + Vulkan compute queue index in device + +Parameter ``totalTimestamps``: + Maximum number of timestamps to allocate)doc"; static const char *__doc_kp_Sequence_begin = R"doc(Begins recording commands for commands to be submitted into the -command buffer. - -@return Boolean stating whether execution was successful.)doc"; +command buffer.)doc"; static const char *__doc_kp_Sequence_clear = R"doc(Clear function clears all operations currently recorded and starts @@ -482,42 +1421,51 @@ memory and sets the sequence as init=False.)doc"; static const char *__doc_kp_Sequence_end = R"doc(Ends the recording and stops recording commands when the record -command is sent. - -@return Boolean stating whether execution was successful.)doc"; +command is sent.)doc"; static const char *__doc_kp_Sequence_eval = R"doc(Eval sends all the recorded and stored operations in the vector of operations into the gpu as a submit job synchronously (with a barrier). -@return shared_ptr of the Sequence class itself)doc"; +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_eval_2 = R"doc(Resets all the recorded and stored operations, records the operation provided and submits into the gpu as a submit job synchronously (with a barrier). -@return shared_ptr of the Sequence class itself)doc"; +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_eval_3 = R"doc(Eval sends all the recorded and stored operations in the vector of operations into the gpu as a submit job with a barrier. -@param tensors Vector of tensors to use for the operation @param TArgs -Template parameters that are used to initialise operation which allows -for extensible configurations on initialisation. @return -shared_ptr of the Sequence class itself)doc"; +Parameter ``memObjects``: + Vector of memory objects to use for the operation + +Parameter ``TArgs``: + Template parameters that are used to initialise operation which + allows for extensible configurations on initialisation. + +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_eval_4 = R"doc(Eval sends all the recorded and stored operations in the vector of operations into the gpu as a submit job with a barrier. -@param algorithm Algorithm to use for the record often used for OpAlgo -operations @param TArgs Template parameters that are used to -initialise operation which allows for extensible configurations on -initialisation. @return shared_ptr of the Sequence class -itself)doc"; +Parameter ``algorithm``: + Algorithm to use for the record often used for OpAlgo operations + +Parameter ``TArgs``: + Template parameters that are used to initialise operation which + allows for extensible configurations on initialisation. + +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_evalAsync = R"doc(Eval Async sends all the recorded and stored operations in the vector @@ -525,7 +1473,8 @@ of operations into the gpu as a submit job without a barrier. EvalAwait() must ALWAYS be called after to ensure the sequence is terminated correctly. -@return Boolean stating whether execution was successful.)doc"; +Returns: + Boolean stating whether execution was successful.)doc"; static const char *__doc_kp_Sequence_evalAsync_2 = R"doc(Clears currnet operations to record provided one in the vector of @@ -533,33 +1482,46 @@ operations into the gpu as a submit job without a barrier. EvalAwait() must ALWAYS be called after to ensure the sequence is terminated correctly. -@return Boolean stating whether execution was successful.)doc"; +Returns: + Boolean stating whether execution was successful.)doc"; static const char *__doc_kp_Sequence_evalAsync_3 = R"doc(Eval sends all the recorded and stored operations in the vector of operations into the gpu as a submit job with a barrier. -@param tensors Vector of tensors to use for the operation @param TArgs -Template parameters that are used to initialise operation which allows -for extensible configurations on initialisation. @return -shared_ptr of the Sequence class itself)doc"; +Parameter ``memObjects``: + Vector of memory objects to use for the operation + +Parameter ``TArgs``: + Template parameters that are used to initialise operation which + allows for extensible configurations on initialisation. + +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_evalAsync_4 = R"doc(Eval sends all the recorded and stored operations in the vector of operations into the gpu as a submit job with a barrier. -@param algorithm Algorithm to use for the record often used for OpAlgo -operations @param TArgs Template parameters that are used to -initialise operation which allows for extensible configurations on -initialisation. @return shared_ptr of the Sequence class -itself)doc"; +Parameter ``algorithm``: + Algorithm to use for the record often used for OpAlgo operations + +Parameter ``TArgs``: + Template parameters that are used to initialise operation which + allows for extensible configurations on initialisation. + +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_evalAwait = R"doc(Eval Await waits for the fence to finish processing and then once it finishes, it runs the postEval of all operations. -@param waitFor Number of milliseconds to wait before timing out. -@return shared_ptr of the Sequence class itself)doc"; +Parameter ``waitFor``: + Number of milliseconds to wait before timing out. + +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_getTimestamps = R"doc(Return the timestamps that were latched at the beginning and after @@ -567,20 +1529,23 @@ each operation during the last eval() call.)doc"; static const char *__doc_kp_Sequence_isInit = R"doc(Returns true if the sequence has been initialised, and it's based on -the GPU resources being refrenced. +the GPU resources being referenced. -@return Boolean stating if is initialized)doc"; +Returns: + Boolean stating if is initialized)doc"; static const char *__doc_kp_Sequence_isRecording = R"doc(Returns true if the sequence is currently in recording activated. -@return Boolean stating if recording ongoing.)doc"; +Returns: + Boolean stating if recording ongoing.)doc"; static const char *__doc_kp_Sequence_isRunning = R"doc(Returns true if the sequence is currently running - mostly used for async workloads. -@return Boolean stating if currently running.)doc"; +Returns: + Boolean stating if currently running.)doc"; static const char *__doc_kp_Sequence_mCommandBuffer = R"doc()doc"; @@ -612,9 +1577,12 @@ This template requires classes to be derived from the OpBase class. This function also requires the Sequence to be recording, otherwise it will not be able to add the operation. -@param op Object derived from kp::BaseOp that will be recoreded by the -sequence which will be used when the operation is evaluated. @return -shared_ptr of the Sequence class itself)doc"; +Parameter ``op``: + Object derived from kp::BaseOp that will be recoreded by the + sequence which will be used when the operation is evaluated. + +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_record_2 = R"doc(Record function for operation to be added to the GPU queue in batch. @@ -622,10 +1590,15 @@ This template requires classes to be derived from the OpBase class. This function also requires the Sequence to be recording, otherwise it will not be able to add the operation. -@param tensors Vector of tensors to use for the operation @param TArgs -Template parameters that are used to initialise operation which allows -for extensible configurations on initialisation. @return -shared_ptr of the Sequence class itself)doc"; +Parameter ``memObjects``: + Vector of mem objects to use for the operation + +Parameter ``TArgs``: + Template parameters that are used to initialise operation which + allows for extensible configurations on initialisation. + +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_record_3 = R"doc(Record function for operation to be added to the GPU queue in batch. @@ -633,48 +1606,23 @@ This template requires classes to be derived from the OpBase class. This function also requires the Sequence to be recording, otherwise it will not be able to add the operation. -@param algorithm Algorithm to use for the record often used for OpAlgo -operations @param TArgs Template parameters that are used to -initialise operation which allows for extensible configurations on -initialisation. @return shared_ptr of the Sequence class -itself)doc"; +Parameter ``algorithm``: + Algorithm to use for the record often used for OpAlgo operations + +Parameter ``TArgs``: + Template parameters that are used to initialise operation which + allows for extensible configurations on initialisation. + +Returns: + shared_ptr of the Sequence class itself)doc"; static const char *__doc_kp_Sequence_rerecord = R"doc(Clears command buffer and triggers re-record of all the current -operations saved, which is useful if the underlying kp::Tensors or +operations saved, which is useful if the underlying kp::Memorys or kp::Algorithms are modified and need to be re-recorded.)doc"; static const char *__doc_kp_Sequence_timestampQueryPool = R"doc()doc"; -static const char *__doc_kp_Shader = R"doc(Shader utily class with functions to compile and process glsl files.)doc"; - -static const char *__doc_kp_Shader_compileSource = -R"doc(Compile a single glslang source from string value. Currently this -function uses the glslang C++ interface which is not thread safe so -this funciton should not be called from multiple threads concurrently. -If you have a online shader processing multithreading use-case that -can't use offline compilation please open an issue. - -@param source An individual raw glsl shader in string format @param -entryPoint The function name to use as entry point @param definitions -List of pairs containing key value definitions @param resourcesLimit A -list that contains the resource limits for the GLSL compiler @return -The compiled SPIR-V binary in unsigned int32 format)doc"; - -static const char *__doc_kp_Shader_compileSources = -R"doc(Compile multiple sources with optional filenames. Currently this -function uses the glslang C++ interface which is not thread safe so -this funciton should not be called from multiple threads concurrently. -If you have a online shader processing multithreading use-case that -can't use offline compilation please open an issue. - -@param sources A list of raw glsl shaders in string format @param -files A list of file names respective to each of the sources @param -entryPoint The function name to use as entry point @param definitions -List of pairs containing key value definitions @param resourcesLimit A -list that contains the resource limits for the GLSL compiler @return -The compiled SPIR-V binary in unsigned int32 format)doc"; - static const char *__doc_kp_Tensor = R"doc(Structured data used in GPU operations. @@ -687,12 +1635,8 @@ static const char *__doc_kp_TensorT = R"doc()doc"; static const char *__doc_kp_TensorT_TensorT = R"doc()doc"; -static const char *__doc_kp_TensorT_data = R"doc()doc"; - static const char *__doc_kp_TensorT_dataType = R"doc()doc"; -static const char *__doc_kp_TensorT_operator_array = R"doc()doc"; - static const char *__doc_kp_TensorT_setData = R"doc()doc"; static const char *__doc_kp_TensorT_vector = R"doc()doc"; @@ -701,16 +1645,24 @@ static const char *__doc_kp_Tensor_Tensor = R"doc(Constructor with data provided which would be used to create the respective vulkan buffer and memory. -@param physicalDevice The physical device to use to fetch properties -@param device The device to use to create the buffer and memory from -@param data Non-zero-sized vector of data that will be used by the -tensor @param tensorTypes Type for the tensor which is of type -TensorTypes)doc"; +Parameter ``physicalDevice``: + The physical device to use to fetch properties + +Parameter ``device``: + The device to use to create the buffer and memory from + +Parameter ``data``: + Non-zero-sized vector of data that will be used by the tensor + +Parameter ``tensorTypes``: + Type for the tensor which is of type MemoryTypes)doc"; static const char *__doc_kp_Tensor_TensorDataTypes = R"doc()doc"; static const char *__doc_kp_Tensor_TensorDataTypes_eBool = R"doc()doc"; +static const char *__doc_kp_Tensor_TensorDataTypes_eCustom = R"doc()doc"; + static const char *__doc_kp_Tensor_TensorDataTypes_eDouble = R"doc()doc"; static const char *__doc_kp_Tensor_TensorDataTypes_eFloat = R"doc()doc"; @@ -719,44 +1671,38 @@ static const char *__doc_kp_Tensor_TensorDataTypes_eInt = R"doc()doc"; static const char *__doc_kp_Tensor_TensorDataTypes_eUnsignedInt = R"doc()doc"; -static const char *__doc_kp_Tensor_TensorTypes = -R"doc(Type for tensors created: Device allows memory to be transferred from -staging buffers. Staging are host memory visible. Storage are device -visible but are not set up to transfer or receive data (only for -shader storage).)doc"; - -static const char *__doc_kp_Tensor_TensorTypes_eDevice = R"doc(< Type is device memory, source and destination)doc"; +static const char *__doc_kp_Tensor_allocateBindMemory = R"doc()doc"; -static const char *__doc_kp_Tensor_TensorTypes_eHost = R"doc(< Type is host memory, source and destination)doc"; +static const char *__doc_kp_Tensor_allocateMemoryCreateGPUResources = R"doc()doc"; -static const char *__doc_kp_Tensor_TensorTypes_eStorage = R"doc(< Type is Device memory (only))doc"; +static const char *__doc_kp_Tensor_constructDescriptorBufferInfo = R"doc()doc"; -static const char *__doc_kp_Tensor_allocateBindMemory = R"doc()doc"; +static const char *__doc_kp_Tensor_constructDescriptorSet = +R"doc(Adds this object to a Vulkan descriptor set at \p binding. -static const char *__doc_kp_Tensor_allocateMemoryCreateGPUResources = R"doc()doc"; +Parameter ``descriptorSet``: + The descriptor set to add to. -static const char *__doc_kp_Tensor_constructDescriptorBufferInfo = -R"doc(Constructs a vulkan descriptor buffer info which can be used to -specify and reference the underlying buffer component of the tensor -without exposing it. +Parameter ``binding``: + The binding number to use. -@return Descriptor buffer info with own buffer)doc"; +Returns: + Add this object to a descriptor set at \p binding.)doc"; static const char *__doc_kp_Tensor_createBuffer = R"doc()doc"; -static const char *__doc_kp_Tensor_data = R"doc()doc"; - static const char *__doc_kp_Tensor_dataType = -R"doc(Retrieve the underlying data type of the Tensor - -@return Data type of tensor of type kp::Tensor::TensorDataTypes)doc"; +R"doc(Retrieve the data type of the tensor (host, device, storage) -static const char *__doc_kp_Tensor_dataTypeMemorySize = R"doc()doc"; +Returns: + Data type of tensor of type kp::Tensor::TensorDataTypes)doc"; static const char *__doc_kp_Tensor_destroy = R"doc(Destroys and frees the GPU resources which include the buffer and memory.)doc"; +static const char *__doc_kp_Tensor_getPrimaryBuffer = R"doc()doc"; + static const char *__doc_kp_Tensor_getPrimaryBufferUsageFlags = R"doc()doc"; static const char *__doc_kp_Tensor_getPrimaryMemoryPropertyFlags = R"doc()doc"; @@ -769,109 +1715,114 @@ static const char *__doc_kp_Tensor_isInit = R"doc(Check whether tensor is initialized based on the created gpu resources. -@returns Boolean stating whether tensor is initialized)doc"; +Returns: + Boolean stating whether tensor is initialized)doc"; static const char *__doc_kp_Tensor_mDataType = R"doc()doc"; -static const char *__doc_kp_Tensor_mDataTypeMemorySize = R"doc()doc"; - -static const char *__doc_kp_Tensor_mDevice = R"doc()doc"; +static const char *__doc_kp_Tensor_mDescriptorBufferInfo = R"doc()doc"; static const char *__doc_kp_Tensor_mFreePrimaryBuffer = R"doc()doc"; -static const char *__doc_kp_Tensor_mFreePrimaryMemory = R"doc()doc"; - static const char *__doc_kp_Tensor_mFreeStagingBuffer = R"doc()doc"; -static const char *__doc_kp_Tensor_mFreeStagingMemory = R"doc()doc"; - -static const char *__doc_kp_Tensor_mPhysicalDevice = R"doc()doc"; - static const char *__doc_kp_Tensor_mPrimaryBuffer = R"doc()doc"; -static const char *__doc_kp_Tensor_mPrimaryMemory = R"doc()doc"; - -static const char *__doc_kp_Tensor_mRawData = R"doc()doc"; - -static const char *__doc_kp_Tensor_mSize = R"doc()doc"; - static const char *__doc_kp_Tensor_mStagingBuffer = R"doc()doc"; -static const char *__doc_kp_Tensor_mStagingMemory = R"doc()doc"; - -static const char *__doc_kp_Tensor_mTensorType = R"doc()doc"; - -static const char *__doc_kp_Tensor_mapRawData = R"doc()doc"; - -static const char *__doc_kp_Tensor_memorySize = R"doc()doc"; - -static const char *__doc_kp_Tensor_rawData = R"doc()doc"; - static const char *__doc_kp_Tensor_rebuild = R"doc(Function to trigger reinitialisation of the tensor buffer and memory with new data as well as new potential device type. -@param data Vector of data to use to initialise vector from @param -tensorType The type to use for the tensor)doc"; +Parameter ``data``: + Vector of data to use to initialise vector from -static const char *__doc_kp_Tensor_recordBufferMemoryBarrier = -R"doc(Records the buffer memory barrier into the command buffer which -ensures that relevant data transfers are carried out correctly. +Parameter ``tensorType``: + The type to use for the tensor)doc"; -@param commandBuffer Vulkan Command Buffer to record the commands into -@param srcAccessMask Access flags for source access mask @param -dstAccessMask Access flags for destination access mask @param -scrStageMask Pipeline stage flags for source stage mask @param -dstStageMask Pipeline stage flags for destination stage mask)doc"; +static const char *__doc_kp_Tensor_recordBufferMemoryBarrier = R"doc()doc"; static const char *__doc_kp_Tensor_recordCopyBuffer = R"doc()doc"; +static const char *__doc_kp_Tensor_recordCopyBufferFromImage = R"doc()doc"; + static const char *__doc_kp_Tensor_recordCopyFrom = R"doc(Records a copy from the memory of the tensor provided to the current -thensor. This is intended to pass memory into a processing, to perform +tensor. This is intended to pass memory into a processing, to perform a staging buffer transfer, or to gather output (between others). -@param commandBuffer Vulkan Command Buffer to record the commands into -@param copyFromTensor Tensor to copy the data from @param -createBarrier Whether to create a barrier that ensures the data is -copied before further operations. Default is true.)doc"; +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``copyFromTensor``: + Tensor to copy the data from)doc"; + +static const char *__doc_kp_Tensor_recordCopyFrom_2 = +R"doc(Records a copy from the memory of the image provided to the current +tensor. This is intended to pass memory into a processing, to perform +a staging buffer transfer, or to gather output (between others). + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``copyFromImage``: + Image to copy the data from)doc"; static const char *__doc_kp_Tensor_recordCopyFromDeviceToStaging = R"doc(Records a copy from the internal device memory to the staging memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice. -@param commandBuffer Vulkan Command Buffer to record the commands into -@param createBarrier Whether to create a barrier that ensures the data -is copied before further operations. Default is true.)doc"; +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into)doc"; static const char *__doc_kp_Tensor_recordCopyFromStagingToDevice = R"doc(Records a copy from the internal staging memory to the device memory using an optional barrier to wait for the operation. This function would only be relevant for kp::Tensors of type eDevice. -@param commandBuffer Vulkan Command Buffer to record the commands into -@param createBarrier Whether to create a barrier that ensures the data -is copied before further operations. Default is true.)doc"; +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into)doc"; + +static const char *__doc_kp_Tensor_recordPrimaryMemoryBarrier = +R"doc(Records the memory barrier into the primary buffer and command buffer +which ensures that relevant data transfers are carried out correctly. + +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into + +Parameter ``srcAccessMask``: + Access flags for source access mask + +Parameter ``dstAccessMask``: + Access flags for destination access mask + +Parameter ``scrStageMask``: + Pipeline stage flags for source stage mask + +Parameter ``dstStageMask``: + Pipeline stage flags for destination stage mask)doc"; -static const char *__doc_kp_Tensor_setRawData = -R"doc(Sets / resets the vector data of the tensor. This function does not -perform any copies into GPU memory and is only performed on the host.)doc"; +static const char *__doc_kp_Tensor_recordStagingMemoryBarrier = +R"doc(Records the memory barrier into the staging buffer and command buffer +which ensures that relevant data transfers are carried out correctly. -static const char *__doc_kp_Tensor_size = -R"doc(Returns the size/magnitude of the Tensor, which will be the total -number of elements across all dimensions +Parameter ``commandBuffer``: + Vulkan Command Buffer to record the commands into -@return Unsigned integer representing the total number of elements)doc"; +Parameter ``srcAccessMask``: + Access flags for source access mask -static const char *__doc_kp_Tensor_tensorType = -R"doc(Retrieve the tensor type of the Tensor +Parameter ``dstAccessMask``: + Access flags for destination access mask -@return Tensor type of tensor)doc"; +Parameter ``scrStageMask``: + Pipeline stage flags for source stage mask -static const char *__doc_kp_Tensor_unmapRawData = R"doc()doc"; +Parameter ``dstStageMask``: + Pipeline stage flags for destination stage mask)doc"; -static const char *__doc_kp_Tensor_vector = R"doc()doc"; +static const char *__doc_kp_Tensor_toString = R"doc()doc"; #if defined(__GNUG__) #pragma GCC diagnostic pop diff --git a/python/src/main.cpp b/python/src/main.cpp index 6c0f640c..6c2dcba4 100644 --- a/python/src/main.cpp +++ b/python/src/main.cpp @@ -65,16 +65,16 @@ PYBIND11_MODULE(kp, m) py::module_ np = py::module_::import("numpy"); - py::enum_(m, "TensorTypes") + py::enum_(m, "MemoryTypes") .value("device", - kp::Tensor::TensorTypes::eDevice, - DOC(kp, Tensor, TensorTypes, eDevice)) + kp::Memory::MemoryTypes::eDevice, + DOC(kp, Memory, MemoryTypes, eDevice)) .value("host", - kp::Tensor::TensorTypes::eHost, - DOC(kp, Tensor, TensorTypes, eHost)) + kp::Memory::MemoryTypes::eHost, + DOC(kp, Memory, MemoryTypes, eHost)) .value("storage", - kp::Tensor::TensorTypes::eStorage, - DOC(kp, Tensor, TensorTypes, eStorage)) + kp::Memory::MemoryTypes::eStorage, + DOC(kp, Memory, MemoryTypes, eStorage)) .export_values(); py::class_>( @@ -84,20 +84,38 @@ PYBIND11_MODULE(kp, m) kp::OpBase, std::shared_ptr>( m, "OpTensorSyncDevice", DOC(kp, OpTensorSyncDevice)) - .def(py::init>&>(), + .def(py::init>&>(), DOC(kp, OpTensorSyncDevice, OpTensorSyncDevice)); py::class_>( m, "OpTensorSyncLocal", DOC(kp, OpTensorSyncLocal)) - .def(py::init>&>(), + .def(py::init>&>(), DOC(kp, OpTensorSyncLocal, OpTensorSyncLocal)); py::class_>( m, "OpTensorCopy", DOC(kp, OpTensorCopy)) - .def(py::init>&>(), + .def(py::init>&>(), DOC(kp, OpTensorCopy, OpTensorCopy)); + py::class_>( + m, "OpImageSyncDevice", DOC(kp, OpImageSyncDevice)) + .def(py::init>&>(), + DOC(kp, OpImageSyncDevice, OpImageSyncDevice)); + + py::class_>( + m, "OpImageSyncLocal", DOC(kp, OpImageSyncLocal)) + .def(py::init>&>(), + DOC(kp, OpImageSyncLocal, OpImageSyncLocal)); + + py::class_>( + m, "OpImageCopy", DOC(kp, OpImageCopy)) + .def(py::init>&>(), + DOC(kp, OpImageCopy, OpImageCopy)); py::class_>( m, "OpMult", DOC(kp, OpMult)) - .def(py::init>&, + .def(py::init>&, const std::shared_ptr&>(), DOC(kp, OpMult, OpMult)); py::class_>( m, "Algorithm", DOC(kp, Algorithm, Algorithm)) - .def("get_tensors", - &kp::Algorithm::getTensors, - DOC(kp, Algorithm, getTensors)) + .def("get_mem_objects", + &kp::Algorithm::getMemObjects, + DOC(kp, Algorithm, getMemObjects)) .def("destroy", &kp::Algorithm::destroy, DOC(kp, Algorithm, destroy)) .def("is_init", &kp::Algorithm::isInit, DOC(kp, Algorithm, isInit)); - py::class_>( + py::class_>( + m, "Memory", DOC(kp, Memory)); + + py::class_, kp::Memory>( m, "Tensor", DOC(kp, Tensor)) .def( "data", @@ -154,13 +175,53 @@ PYBIND11_MODULE(kp, m) "Kompute Python data type not supported"); } }, - DOC(kp, Tensor, data)) - .def("size", &kp::Tensor::size, DOC(kp, Tensor, size)) - .def("__len__", &kp::Tensor::size, DOC(kp, Tensor, size)) - .def("tensor_type", &kp::Tensor::tensorType, DOC(kp, Tensor, tensorType)) + DOC(kp, Memory, data)) + .def("size", &kp::Tensor::size, DOC(kp, Memory, size)) + .def("__len__", &kp::Tensor::size, DOC(kp, Memory, size)) + .def("memory_type", &kp::Tensor::memoryType, DOC(kp, Memory, memoryType)) .def("data_type", &kp::Tensor::dataType, DOC(kp, Tensor, dataType)) .def("is_init", &kp::Tensor::isInit, DOC(kp, Tensor, isInit)) .def("destroy", &kp::Tensor::destroy, DOC(kp, Tensor, destroy)); + py::class_, kp::Memory>( + m, "Image", DOC(kp, Image)) + .def( + "data", + [](kp::Image& self) { + // Non-owning container exposing the underlying pointer + switch (self.dataType()) { + case kp::Image::ImageDataTypes::eF32: + return py::array( + self.size(), self.data(), py::cast(&self)); + case kp::Image::ImageDataTypes::eU32: + return py::array( + self.size(), self.data(), py::cast(&self)); + case kp::Image::ImageDataTypes::eS32: + return py::array( + self.size(), self.data(), py::cast(&self)); + case kp::Image::ImageDataTypes::eU16: + return py::array( + self.size(), self.data(), py::cast(&self)); + case kp::Image::ImageDataTypes::eS16: + return py::array( + self.size(), self.data(), py::cast(&self)); + case kp::Image::ImageDataTypes::eU8: + return py::array( + self.size(), self.data(), py::cast(&self)); + case kp::Image::ImageDataTypes::eS8: + return py::array( + self.size(), self.data(), py::cast(&self)); + default: + throw std::runtime_error( + "Kompute Python data type not supported"); + } + }, + DOC(kp, Memory, data)) + .def("size", &kp::Image::size, DOC(kp, Memory, size)) + .def("__len__", &kp::Image::size, DOC(kp, Memory, size)) + .def("memory_type", &kp::Image::memoryType, DOC(kp, Memory, memoryType)) + .def("data_type", &kp::Image::dataType, DOC(kp, Image, dataType)) + .def("is_init", &kp::Image::isInit, DOC(kp, Image, isInit)) + .def("destroy", &kp::Image::destroy, DOC(kp, Image, destroy)); py::class_>(m, "Sequence") .def( @@ -230,7 +291,7 @@ PYBIND11_MODULE(kp, m) "tensor", [np](kp::Manager& self, const py::array_t& data, - kp::Tensor::TensorTypes tensor_type) { + kp::Memory::MemoryTypes memory_type) { const py::array_t& flatdata = np.attr("ravel")(data); const py::buffer_info info = flatdata.request(); KP_LOG_DEBUG("Kompute Python Manager tensor() creating tensor " @@ -240,16 +301,16 @@ PYBIND11_MODULE(kp, m) flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, - tensor_type); + memory_type); }, DOC(kp, Manager, tensor), py::arg("data"), - py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) + py::arg("memory_type") = kp::Memory::MemoryTypes::eDevice) .def( "tensor_t", [np](kp::Manager& self, const py::array& data, - kp::Tensor::TensorTypes tensor_type) { + kp::Memory::MemoryTypes memory_type) { // TODO: Suppport strides in numpy format const py::array& flatdata = np.attr("ravel")(data); const py::buffer_info info = flatdata.request(); @@ -262,31 +323,31 @@ PYBIND11_MODULE(kp, m) flatdata.size(), sizeof(float), kp::Tensor::TensorDataTypes::eFloat, - tensor_type); + memory_type); } else if (flatdata.dtype().is(py::dtype::of())) { return self.tensor(info.ptr, flatdata.size(), sizeof(uint32_t), kp::Tensor::TensorDataTypes::eUnsignedInt, - tensor_type); + memory_type); } else if (flatdata.dtype().is(py::dtype::of())) { return self.tensor(info.ptr, flatdata.size(), sizeof(int32_t), kp::Tensor::TensorDataTypes::eInt, - tensor_type); + memory_type); } else if (flatdata.dtype().is(py::dtype::of())) { return self.tensor(info.ptr, flatdata.size(), sizeof(double), kp::Tensor::TensorDataTypes::eDouble, - tensor_type); + memory_type); } else if (flatdata.dtype().is(py::dtype::of())) { return self.tensor(info.ptr, flatdata.size(), sizeof(bool), kp::Tensor::TensorDataTypes::eBool, - tensor_type); + memory_type); } else { throw std::runtime_error( "Kompute Python no valid dtype supported"); @@ -294,11 +355,112 @@ PYBIND11_MODULE(kp, m) }, DOC(kp, Manager, tensorT), py::arg("data"), - py::arg("tensor_type") = kp::Tensor::TensorTypes::eDevice) + py::arg("memory_type") = kp::Memory::MemoryTypes::eDevice) + .def( + "image", + [np](kp::Manager& self, + const py::array_t& data, + uint32_t width, + uint32_t height, + uint32_t num_channels, + kp::Memory::MemoryTypes memory_type) { + const py::array_t& flatdata = np.attr("ravel")(data); + const py::buffer_info info = flatdata.request(); + KP_LOG_DEBUG("Kompute Python Manager image() creating image " + "float with data size {}", + flatdata.size()); + return self.image(info.ptr, + width, + height, + num_channels, + kp::Image::ImageDataTypes::eF32, + memory_type); + }, + DOC(kp, Manager, image), + py::arg("data"), + py::arg("width"), + py::arg("height"), + py::arg("num_channels"), + py::arg("memory_type") = kp::Memory::MemoryTypes::eDevice) + .def( + "image_t", + [np](kp::Manager& self, + const py::array& data, + uint32_t width, + uint32_t height, + uint32_t num_channels, + kp::Memory::MemoryTypes memory_type) { + // TODO: Suppport strides in numpy format + const py::array& flatdata = np.attr("ravel")(data); + const py::buffer_info info = flatdata.request(); + KP_LOG_DEBUG("Kompute Python Manager creating image_T with data " + "size {} dtype {}", + flatdata.size(), + std::string(py::str(flatdata.dtype()))); + if (flatdata.dtype().is(py::dtype::of())) { + return self.image(info.ptr, + width, + height, + num_channels, + kp::Image::ImageDataTypes::eF32, + memory_type); + } else if (flatdata.dtype().is(py::dtype::of())) { + return self.image(info.ptr, + width, + height, + num_channels, + kp::Image::ImageDataTypes::eU32, + memory_type); + } else if (flatdata.dtype().is(py::dtype::of())) { + return self.image(info.ptr, + width, + height, + num_channels, + kp::Image::ImageDataTypes::eS32, + memory_type); + } else if (flatdata.dtype().is(py::dtype::of())) { + return self.image(info.ptr, + width, + height, + num_channels, + kp::Image::ImageDataTypes::eU16, + memory_type); + } else if (flatdata.dtype().is(py::dtype::of())) { + return self.image(info.ptr, + width, + height, + num_channels, + kp::Image::ImageDataTypes::eS16, + memory_type); + } else if (flatdata.dtype().is(py::dtype::of())) { + return self.image(info.ptr, + width, + height, + num_channels, + kp::Image::ImageDataTypes::eU8, + memory_type); + } else if (flatdata.dtype().is(py::dtype::of())) { + return self.image(info.ptr, + width, + height, + num_channels, + kp::Image::ImageDataTypes::eS8, + memory_type); + } else { + throw std::runtime_error( + "Kompute Python no valid dtype supported"); + } + }, + DOC(kp, Manager, imageT), + py::arg("data"), + py::arg("width"), + py::arg("height"), + py::arg("num_channels"), + py::arg("memory_type") = kp::Memory::MemoryTypes::eDevice) .def( "algorithm", [](kp::Manager& self, - const std::vector>& tensors, + const std::vector>& tensors, const py::bytes& spirv, const kp::Workgroup& workgroup, const std::vector& spec_consts, @@ -320,7 +482,7 @@ PYBIND11_MODULE(kp, m) .def( "algorithm", [np](kp::Manager& self, - const std::vector>& tensors, + const std::vector>& tensors, const py::bytes& spirv, const kp::Workgroup& workgroup, const py::array& spec_consts, diff --git a/python/test/test_image_types.py b/python/test/test_image_types.py new file mode 100644 index 00000000..17d6dcf6 --- /dev/null +++ b/python/test/test_image_types.py @@ -0,0 +1,309 @@ +import os +import pytest +import kp +import numpy as np + +from .utils import compile_source + +VK_ICD_FILENAMES = os.environ.get("VK_ICD_FILENAMES", "") + +def test_type_float(): + + shader = """ + #version 450 + layout(set = 0, binding = 0, r32f) uniform image2D valuesLhs; + layout(set = 0, binding = 1, r32f) uniform image2D valuesRhs; + layout(set = 0, binding = 2, r32f) uniform image2D imageOutput; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + imageStore(imageOutput, ivec2(index, 0), imageLoad(valuesLhs, ivec2(index, 0)) * imageLoad(valuesRhs, ivec2(index, 0))); + } + """ + + spirv = compile_source(shader) + + arr_in_a = np.array([123., 153., 231.], dtype=np.float32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.float32) + arr_out = np.array([0, 0, 0], dtype=np.float32) + + mgr = kp.Manager() + + image_in_a = mgr.image(arr_in_a, 3, 1, 1) + image_in_b = mgr.image(arr_in_b, 3, 1, 1) + image_out = mgr.image(arr_out, 3, 1, 1) + + params = [image_in_a, image_in_b, image_out] + + (mgr.sequence() + .record(kp.OpImageSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpImageSyncLocal([image_out])) + .eval()) + + assert np.all(image_out.data() == arr_in_a * arr_in_b) + +def test_type_int(): + + shader = """ + #version 450 + layout(set = 0, binding = 0, r32i) uniform iimage2D valuesLhs; + layout(set = 0, binding = 1, r32i) uniform iimage2D valuesRhs; + layout(set = 0, binding = 2, r32i) uniform iimage2D imageOutput; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + imageStore(imageOutput, ivec2(index, 0), imageLoad(valuesLhs, ivec2(index, 0)) * imageLoad(valuesRhs, ivec2(index, 0))); + } + """ + + spirv = compile_source(shader) + + arr_in_a = np.array([123, 153, 231], dtype=np.int32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.int32) + arr_out = np.array([0, 0, 0], dtype=np.int32) + + mgr = kp.Manager() + + image_in_a = mgr.image_t(arr_in_a, 3, 1, 1) + image_in_b = mgr.image_t(arr_in_b, 3, 1, 1) + image_out = mgr.image_t(arr_out, 3, 1, 1) + + params = [image_in_a, image_in_b, image_out] + + (mgr.sequence() + .record(kp.OpImageSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpImageSyncLocal([image_out])) + .eval()) + + print(f"Dtype value {image_out.data().dtype}") + + assert np.all(image_out.data() == arr_in_a * arr_in_b) + +def test_type_unsgined_int(): + + shader = """ + #version 450 + layout(set = 0, binding = 0, r32ui) uniform uimage2D valuesLhs; + layout(set = 0, binding = 1, r32ui) uniform uimage2D valuesRhs; + layout(set = 0, binding = 2, r32ui) uniform uimage2D imageOutput; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + imageStore(imageOutput, ivec2(index, 0), imageLoad(valuesLhs, ivec2(index, 0)) * imageLoad(valuesRhs, ivec2(index, 0))); + } + """ + + spirv = compile_source(shader) + + arr_in_a = np.array([123, 153, 231], dtype=np.uint32) + arr_in_b = np.array([9482, 1208, 1238], dtype=np.uint32) + arr_out = np.array([0, 0, 0], dtype=np.uint32) + + mgr = kp.Manager() + + image_in_a = mgr.image_t(arr_in_a, 3, 1, 1) + image_in_b = mgr.image_t(arr_in_b, 3, 1, 1) + image_out = mgr.image_t(arr_out, 3, 1, 1) + + params = [image_in_a, image_in_b, image_out] + + (mgr.sequence() + .record(kp.OpImageSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpImageSyncLocal([image_out])) + .eval()) + + print(f"Dtype value {image_out.data().dtype}") + + assert np.all(image_out.data() == arr_in_a * arr_in_b) + +def test_type_short(): + + shader = """ + #version 450 + layout(set = 0, binding = 0, r16i) uniform iimage2D valuesLhs; + layout(set = 0, binding = 1, r16i) uniform iimage2D valuesRhs; + layout(set = 0, binding = 2, r16i) uniform iimage2D imageOutput; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + imageStore(imageOutput, ivec2(index, 0), imageLoad(valuesLhs, ivec2(index, 0)) * imageLoad(valuesRhs, ivec2(index, 0))); + } + """ + + spirv = compile_source(shader) + + arr_in_a = np.array([12, 15, 23], dtype=np.int16) + arr_in_b = np.array([948, 120, 123], dtype=np.int16) + arr_out = np.array([0, 0, 0], dtype=np.int16) + + mgr = kp.Manager() + + image_in_a = mgr.image_t(arr_in_a, 3, 1, 1) + image_in_b = mgr.image_t(arr_in_b, 3, 1, 1) + image_out = mgr.image_t(arr_out, 3, 1, 1) + + params = [image_in_a, image_in_b, image_out] + + (mgr.sequence() + .record(kp.OpImageSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpImageSyncLocal([image_out])) + .eval()) + + print(f"Dtype value {image_out.data().dtype}") + + assert np.all(image_out.data() == arr_in_a * arr_in_b) + +def test_type_unsgined_short(): + + shader = """ + #version 450 + layout(set = 0, binding = 0, r16ui) uniform uimage2D valuesLhs; + layout(set = 0, binding = 1, r16ui) uniform uimage2D valuesRhs; + layout(set = 0, binding = 2, r16ui) uniform uimage2D imageOutput; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + imageStore(imageOutput, ivec2(index, 0), imageLoad(valuesLhs, ivec2(index, 0)) * imageLoad(valuesRhs, ivec2(index, 0))); + } + """ + + spirv = compile_source(shader) + + arr_in_a = np.array([12, 15, 23], dtype=np.uint16) + arr_in_b = np.array([948, 120, 123], dtype=np.uint16) + arr_out = np.array([0, 0, 0], dtype=np.uint16) + + mgr = kp.Manager() + + image_in_a = mgr.image_t(arr_in_a, 3, 1, 1) + image_in_b = mgr.image_t(arr_in_b, 3, 1, 1) + image_out = mgr.image_t(arr_out, 3, 1, 1) + + params = [image_in_a, image_in_b, image_out] + + (mgr.sequence() + .record(kp.OpImageSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpImageSyncLocal([image_out])) + .eval()) + + print(f"Dtype value {image_out.data().dtype}") + + assert np.all(image_out.data() == arr_in_a * arr_in_b) + +def test_type_char(): + + shader = """ + #version 450 + layout(set = 0, binding = 0, r8i) uniform iimage2D valuesLhs; + layout(set = 0, binding = 1, r8i) uniform iimage2D valuesRhs; + layout(set = 0, binding = 2, r8i) uniform iimage2D imageOutput; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + imageStore(imageOutput, ivec2(index, 0), imageLoad(valuesLhs, ivec2(index, 0)) * imageLoad(valuesRhs, ivec2(index, 0))); + } + """ + + spirv = compile_source(shader) + + arr_in_a = np.array([2, 3, 2], dtype=np.int8) + arr_in_b = np.array([35, 12, 23], dtype=np.int8) + arr_out = np.array([0, 0, 0], dtype=np.int8) + + mgr = kp.Manager() + + image_in_a = mgr.image_t(arr_in_a, 3, 1, 1) + image_in_b = mgr.image_t(arr_in_b, 3, 1, 1) + image_out = mgr.image_t(arr_out, 3, 1, 1) + + params = [image_in_a, image_in_b, image_out] + + (mgr.sequence() + .record(kp.OpImageSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpImageSyncLocal([image_out])) + .eval()) + + print(f"Dtype value {image_out.data().dtype}") + + assert np.all(image_out.data() == arr_in_a * arr_in_b) + +def test_type_unsgined_char(): + + shader = """ + #version 450 + layout(set = 0, binding = 0, r8ui) uniform uimage2D valuesLhs; + layout(set = 0, binding = 1, r8ui) uniform uimage2D valuesRhs; + layout(set = 0, binding = 2, r8ui) uniform uimage2D imageOutput; + layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; + + void main() + { + uint index = gl_GlobalInvocationID.x; + imageStore(imageOutput, ivec2(index, 0), imageLoad(valuesLhs, ivec2(index, 0)) * imageLoad(valuesRhs, ivec2(index, 0))); + } + """ + + spirv = compile_source(shader) + + arr_in_a = np.array([2, 3, 2], dtype=np.uint8) + arr_in_b = np.array([35, 12, 23], dtype=np.uint8) + arr_out = np.array([0, 0, 0], dtype=np.uint8) + + mgr = kp.Manager() + + image_in_a = mgr.image_t(arr_in_a, 3, 1, 1) + image_in_b = mgr.image_t(arr_in_b, 3, 1, 1) + image_out = mgr.image_t(arr_out, 3, 1, 1) + + params = [image_in_a, image_in_b, image_out] + + (mgr.sequence() + .record(kp.OpImageSyncDevice(params)) + .record(kp.OpAlgoDispatch(mgr.algorithm(params, spirv))) + .record(kp.OpImageSyncLocal([image_out])) + .eval()) + + print(f"Dtype value {image_out.data().dtype}") + + assert np.all(image_out.data() == arr_in_a * arr_in_b) + +def test_image_numpy_ownership(): + + arr_in = np.array([1, 2, 3]) + + m = kp.Manager() + + t = m.tensor(arr_in) + + # This should increment refcount for tensor sharedptr + td = t.data() + + assert td.base.is_init() == True + assert np.all(td == arr_in) + + del t + + assert td.base.is_init() == True + assert np.all(td == arr_in) + + m.destroy() + + assert td.base.is_init() == False diff --git a/src/Algorithm.cpp b/src/Algorithm.cpp index a354157c..1e56bb03 100644 --- a/src/Algorithm.cpp +++ b/src/Algorithm.cpp @@ -129,10 +129,15 @@ Algorithm::createParameters() { KP_LOG_DEBUG("Kompute Algorithm createParameters started"); + // FIXME: Get the correct count here. std::vector descriptorPoolSizes = { vk::DescriptorPoolSize( vk::DescriptorType::eStorageBuffer, - static_cast(this->mTensors.size()) // Descriptor count + static_cast(this->mMemObjects.size()) // Descriptor count + ), + vk::DescriptorPoolSize( + vk::DescriptorType::eStorageImage, + static_cast(this->mMemObjects.size()) // Descriptor count ) }; @@ -149,10 +154,10 @@ Algorithm::createParameters() this->mFreeDescriptorPool = true; std::vector descriptorSetBindings; - for (size_t i = 0; i < this->mTensors.size(); i++) { + for (size_t i = 0; i < this->mMemObjects.size(); i++) { descriptorSetBindings.push_back( vk::DescriptorSetLayoutBinding(i, // Binding index - vk::DescriptorType::eStorageBuffer, + mMemObjects[i]->getDescriptorType(), 1, // Descriptor count vk::ShaderStageFlagBits::eCompute)); } @@ -181,20 +186,14 @@ Algorithm::createParameters() this->mFreeDescriptorSet = true; KP_LOG_DEBUG("Kompute Algorithm updating descriptor sets"); - for (size_t i = 0; i < this->mTensors.size(); i++) { + for (size_t i = 0; i < this->mMemObjects.size(); i++) { std::vector computeWriteDescriptorSets; - vk::DescriptorBufferInfo descriptorBufferInfo = - this->mTensors[i]->constructDescriptorBufferInfo(); + vk::WriteDescriptorSet descriptorSet = + this->mMemObjects[i]->constructDescriptorSet(*this->mDescriptorSet, + i); - computeWriteDescriptorSets.push_back( - vk::WriteDescriptorSet(*this->mDescriptorSet, - i, // Destination binding - 0, // Destination array element - 1, // Descriptor count - vk::DescriptorType::eStorageBuffer, - nullptr, // Descriptor image info - &descriptorBufferInfo)); + computeWriteDescriptorSets.push_back(descriptorSet); this->mDevice->updateDescriptorSets(computeWriteDescriptorSets, nullptr); @@ -394,10 +393,10 @@ Algorithm::getWorkgroup() return this->mWorkgroup; } -const std::vector>& -Algorithm::getTensors() +const std::vector>& +Algorithm::getMemObjects() { - return this->mTensors; + return this->mMemObjects; } } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eb00e16e..16849731 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -13,11 +13,18 @@ add_library(kompute Algorithm.cpp OpAlgoDispatch.cpp OpMemoryBarrier.cpp OpTensorCopy.cpp + OpTensorCopyToImage.cpp OpTensorSyncDevice.cpp OpTensorSyncLocal.cpp Sequence.cpp Tensor.cpp - Core.cpp) + Core.cpp + Image.cpp + Memory.cpp + OpImageCopy.cpp + OpImageCopyToTensor.cpp + OpImageSyncDevice.cpp + OpImageSyncLocal.cpp) add_library(kompute::kompute ALIAS kompute) diff --git a/src/Image.cpp b/src/Image.cpp new file mode 100644 index 00000000..b4df34d7 --- /dev/null +++ b/src/Image.cpp @@ -0,0 +1,942 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/Image.hpp" + +namespace kp { + +std::string +Image::toString(Image::ImageDataTypes dt) +{ + switch (dt) { + case ImageDataTypes::eU8: + return "U8"; + case ImageDataTypes::eS8: + return "S8"; + case ImageDataTypes::eU16: + return "U16"; + case ImageDataTypes::eS16: + return "S16"; + case ImageDataTypes::eU32: + return "U32"; + case ImageDataTypes::eS32: + return "S32"; + case ImageDataTypes::eF16: + return "F16"; + case ImageDataTypes::eF32: + return "F32"; + default: + return "unknown"; + } +} + +Tensor::TensorDataTypes +Image::getTensorDataType(Image::ImageDataTypes dt) +{ + switch (dt) { + case ImageDataTypes::eU32: + return Tensor::TensorDataTypes::eUnsignedInt; + case ImageDataTypes::eS32: + return Tensor::TensorDataTypes::eInt; + case ImageDataTypes::eF32: + return Tensor::TensorDataTypes::eFloat; + default: + return Tensor::TensorDataTypes::eCustom; + } +} + +void +Image::init(std::shared_ptr physicalDevice, + std::shared_ptr device, + void* data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + const ImageDataTypes& dataType, + vk::ImageTiling tiling, + const MemoryTypes& memoryType) +{ + KP_LOG_DEBUG( + "Kompute Image constructor data width: {}, height: {}, and type: {}", + width, + height, + Memory::toString(memoryType)); + + if (width == 0 || height == 0 || numChannels == 0) { + throw std::runtime_error( + "Kompute Image attempted to create a zero-sized image"); + } + + if (numChannels > 4) + { + throw std::runtime_error("Kompute Images can only have up to 4 channels"); + } + + if (memoryType == MemoryTypes::eStorage && data != nullptr) { + KP_LOG_WARN("Kompute Image of type eStorage do not need to be " + "initialised with data"); + } + + if (tiling == vk::ImageTiling::eOptimal && + (memoryType != Memory::MemoryTypes::eDevice && + memoryType != Memory::MemoryTypes::eStorage)) { + throw std::runtime_error("Kompute Image with optimal tiling is only " + "supported for eDevice and eStorage images"); + } + + this->mPhysicalDevice = physicalDevice; + this->mDevice = device; + this->mDataType = dataType; + this->mMemoryType = memoryType; + this->mWidth = width; + this->mHeight = height; + this->mNumChannels = numChannels; + this->mDescriptorType = vk::DescriptorType::eStorageImage; + this->mTiling = tiling; + + this->rebuild(data); +} + +Image::~Image() +{ + KP_LOG_DEBUG("Kompute Image destructor started. Type: {}", + Memory::toString(this->memoryType())); + + if (this->mDevice) { + this->destroy(); + } + + KP_LOG_DEBUG("Kompute Image destructor success"); +} + +void +Image::rebuild(void* data) +{ + KP_LOG_DEBUG("Kompute Image rebuilding with size {} x {} with {} channels", + this->mWidth, + this->mHeight, + this->mNumChannels); + + if (this->mPrimaryImage || this->mPrimaryMemory) { + KP_LOG_DEBUG( + "Kompute Image destroying existing resources before rebuild"); + this->destroy(); + } + + this->mSize = this->mWidth * this->mHeight * this->mNumChannels; + this->mDataTypeMemorySize = elementTypeSize(this->mDataType); + this->allocateMemoryCreateGPUResources(); + + if (this->memoryType() != Image::MemoryTypes::eStorage && data != nullptr) { + this->mapRawData(); + memcpy(this->mRawData, data, this->memorySize()); + } +} + +bool +Image::isInit() +{ + return this->mDevice && this->mPrimaryImage && this->mPrimaryMemory; +} + +kp::Image::ImageDataTypes +Image::dataType() +{ + return this->mDataType; +} + +void +Image::recordCopyFrom(const vk::CommandBuffer& commandBuffer, + std::shared_ptr copyFromImage) +{ + vk::ImageSubresourceLayers layer = {}; + layer.aspectMask = vk::ImageAspectFlagBits::eColor; + layer.layerCount = 1; + vk::Offset3D offset = { 0, 0, 0 }; + + // FIXME: Check the size of the dest and source images match + vk::Extent3D size = { this->mWidth, this->mHeight, 1 }; + + vk::ImageCopy copyRegion(layer, offset, layer, offset, size); + + KP_LOG_DEBUG( + "Kompute Image recordCopyFrom size {},{}.", size.width, size.height); + + if (copyFromImage->mPrimaryImageLayout == vk::ImageLayout::eUndefined) { + copyFromImage->recordPrimaryMemoryBarrier( + commandBuffer, + vk::AccessFlagBits::eMemoryRead, + vk::AccessFlagBits::eMemoryWrite, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer); + } + + if (this->mPrimaryImageLayout == vk::ImageLayout::eUndefined) { + this->recordPrimaryMemoryBarrier(commandBuffer, + vk::AccessFlagBits::eMemoryRead, + vk::AccessFlagBits::eMemoryWrite, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer); + } + + this->recordCopyImage(commandBuffer, + copyFromImage->mPrimaryImage, + this->mPrimaryImage, + copyRegion); +} + +void +Image::recordCopyFrom(const vk::CommandBuffer& commandBuffer, + std::shared_ptr copyFromTensor) +{ + vk::ImageSubresourceLayers layer = {}; + layer.aspectMask = vk::ImageAspectFlagBits::eColor; + layer.layerCount = 1; + vk::Offset3D offset = { 0, 0, 0 }; + + // FIXME: Check the size of the dest and source images match + vk::Extent3D size = { this->mWidth, this->mHeight, 1 }; + + vk::BufferImageCopy copyRegion(0, 0, 0, layer, offset, size); + + KP_LOG_DEBUG( + "Kompute Image recordCopyFrom size {},{}.", size.width, size.height); + + this->recordCopyImageFromTensor(commandBuffer, + copyFromTensor->getPrimaryBuffer(), + this->mPrimaryImage, + copyRegion); +} + +void +Image::recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer) +{ + vk::ImageSubresourceLayers layer = {}; + layer.aspectMask = vk::ImageAspectFlagBits::eColor; + layer.layerCount = 1; + vk::Offset3D offset = { 0, 0, 0 }; + + // FIXME: Check the size of the dest and source images match + vk::Extent3D size = { this->mWidth, this->mHeight, 1 }; + + vk::ImageCopy copyRegion(layer, offset, layer, offset, size); + + KP_LOG_DEBUG("Kompute Image copying size {},{}.", size.width, size.height); + + if (this->mPrimaryImageLayout == vk::ImageLayout::eUndefined) { + this->recordPrimaryMemoryBarrier(commandBuffer, + vk::AccessFlagBits::eMemoryRead, + vk::AccessFlagBits::eMemoryWrite, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer); + } + + if (this->mStagingImageLayout == vk::ImageLayout::eUndefined) { + this->recordStagingMemoryBarrier(commandBuffer, + vk::AccessFlagBits::eMemoryRead, + vk::AccessFlagBits::eMemoryWrite, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer); + } + + this->recordCopyImage( + commandBuffer, this->mStagingImage, this->mPrimaryImage, copyRegion); +} + +void +Image::recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer) +{ + vk::ImageSubresourceLayers layer; + layer.aspectMask = vk::ImageAspectFlagBits::eColor; + layer.layerCount = 1; + vk::Offset3D offset = { 0, 0, 0 }; + + // FIXME: Check the size of the dest and source images match + vk::Extent3D size = { this->mWidth, this->mHeight, 1 }; + + vk::ImageCopy copyRegion(layer, offset, layer, offset, size); + + KP_LOG_DEBUG("Kompute Image copying size {},{}.", size.width, size.height); + + if (this->mPrimaryImageLayout == vk::ImageLayout::eUndefined) { + this->recordPrimaryMemoryBarrier(commandBuffer, + vk::AccessFlagBits::eMemoryRead, + vk::AccessFlagBits::eMemoryWrite, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer); + } + + if (this->mStagingImageLayout == vk::ImageLayout::eUndefined) { + this->recordStagingMemoryBarrier(commandBuffer, + vk::AccessFlagBits::eMemoryRead, + vk::AccessFlagBits::eMemoryWrite, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer); + } + + this->recordCopyImage( + commandBuffer, this->mPrimaryImage, this->mStagingImage, copyRegion); +} + +void +Image::recordCopyImage(const vk::CommandBuffer& commandBuffer, + std::shared_ptr imageFrom, + std::shared_ptr imageTo, + vk::ImageCopy copyRegion) +{ + commandBuffer.copyImage(*imageFrom, + vk::ImageLayout::eGeneral, + *imageTo, + vk::ImageLayout::eGeneral, + 1, + ©Region); +} + +void +Image::recordCopyImageFromTensor(const vk::CommandBuffer& commandBuffer, + std::shared_ptr bufferFrom, + std::shared_ptr imageTo, + vk::BufferImageCopy copyRegion) +{ + commandBuffer.copyBufferToImage( + *bufferFrom, *imageTo, vk::ImageLayout::eGeneral, 1, ©Region); +} + +void +Image::recordPrimaryMemoryBarrier(const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask) +{ + KP_LOG_DEBUG("Kompute Image recording PRIMARY image memory barrier"); + + // FIXME: Image layout is used here to transition from eUndefined on + // creation to eGeneral for the rest of the program. Is there a better + // way/place to do this? + this->recordImageMemoryBarrier(commandBuffer, + *this->mPrimaryImage, + srcAccessMask, + dstAccessMask, + srcStageMask, + dstStageMask, + this->mPrimaryImageLayout, + vk::ImageLayout::eGeneral); +} + +void +Image::recordStagingMemoryBarrier(const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask) +{ + KP_LOG_DEBUG("Kompute Image recording STAGING image memory barrier"); + + // FIXME: Image layout is used here to transition from eUndefined on + // creation to eGeneral for the rest of the program. Is there a better + // way/place to do this? + this->recordImageMemoryBarrier(commandBuffer, + *this->mStagingImage, + srcAccessMask, + dstAccessMask, + srcStageMask, + dstStageMask, + this->mStagingImageLayout, + vk::ImageLayout::eGeneral); +} + +// FIXME: Make this private. +void +Image::recordImageMemoryBarrier(const vk::CommandBuffer& commandBuffer, + const vk::Image& image, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask, + vk::ImageLayout outLayout, + vk::ImageLayout newLayout) +{ + KP_LOG_DEBUG("Kompute Image recording image memory barrier"); + + vk::ImageMemoryBarrier imageMemoryBarrier; + imageMemoryBarrier.image = image; + + imageMemoryBarrier.subresourceRange.baseMipLevel = 0; + imageMemoryBarrier.subresourceRange.levelCount = 1; + imageMemoryBarrier.subresourceRange.baseArrayLayer = 0; + imageMemoryBarrier.subresourceRange.layerCount = 1; + imageMemoryBarrier.subresourceRange.aspectMask = + vk::ImageAspectFlagBits::eColor; + + imageMemoryBarrier.srcAccessMask = srcAccessMask; + imageMemoryBarrier.dstAccessMask = dstAccessMask; + imageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + + imageMemoryBarrier.oldLayout = outLayout; + imageMemoryBarrier.newLayout = newLayout; + + commandBuffer.pipelineBarrier(srcStageMask, + dstStageMask, + vk::DependencyFlags(), + nullptr, + nullptr, + imageMemoryBarrier); +} + +vk::DescriptorImageInfo +Image::constructDescriptorImageInfo() +{ + KP_LOG_DEBUG("Kompute Image construct descriptor image info size {}", + this->memorySize()); + + // RQ: FIXME: Initilaise these correctly. + vk::ImageViewCreateInfo viewInfo; + viewInfo.image = *this->mPrimaryImage; + viewInfo.format = this->getFormat(); + viewInfo.flags = vk::ImageViewCreateFlags(); + viewInfo.viewType = vk::ImageViewType::e2D; + + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = 1; + viewInfo.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eColor; + + // This image object owns the image view + if (!this->mImageView) { + mImageView = std::make_shared( + this->mDevice->createImageView(viewInfo)); + } + + vk::DescriptorImageInfo descriptorInfo; + + descriptorInfo.imageView = *(mImageView.get()); + descriptorInfo.imageLayout = vk::ImageLayout::eGeneral; + return descriptorInfo; +} + +vk::WriteDescriptorSet +Image::constructDescriptorSet(vk::DescriptorSet descriptorSet, uint32_t binding) +{ + KP_LOG_DEBUG("Kompute Image construct descriptor set for binding {}", + binding); + + mDescriptorImageInfo = this->constructDescriptorImageInfo(); + + return vk::WriteDescriptorSet(descriptorSet, + binding, // Destination binding + 0, // Destination array element + 1, // Descriptor count + vk::DescriptorType::eStorageImage, + &mDescriptorImageInfo, + nullptr); // Descriptor buffer info +} + +vk::ImageUsageFlags +Image::getPrimaryImageUsageFlags() +{ + switch (this->mMemoryType) { + case MemoryTypes::eDevice: + case MemoryTypes::eHost: + case MemoryTypes::eDeviceAndHost: + return vk::ImageUsageFlagBits::eStorage | + vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst; + break; + case MemoryTypes::eStorage: + return vk::ImageUsageFlagBits::eStorage | + // You can still copy image-copy to/from storage memory + // (or at least TestOpImageCopy.CopyThroughStorageImage tests + // you can) so set the transfer usage flags here. + vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst; + break; + default: + throw std::runtime_error("Kompute Image invalid image type"); + } +} + +vk::MemoryPropertyFlags +Image::getPrimaryMemoryPropertyFlags() +{ + switch (this->mMemoryType) { + case MemoryTypes::eDevice: + return vk::MemoryPropertyFlagBits::eDeviceLocal; + break; + case MemoryTypes::eHost: + return vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent; + break; + case MemoryTypes::eDeviceAndHost: + return vk::MemoryPropertyFlagBits::eDeviceLocal | + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent; + case MemoryTypes::eStorage: + return vk::MemoryPropertyFlagBits::eDeviceLocal; + break; + default: + throw std::runtime_error("Kompute Image invalid image type"); + } +} + +vk::ImageUsageFlags +Image::getStagingImageUsageFlags() +{ + switch (this->mMemoryType) { + case MemoryTypes::eDevice: + return vk::ImageUsageFlagBits::eTransferSrc | + vk::ImageUsageFlagBits::eTransferDst; + break; + default: + throw std::runtime_error("Kompute Image invalid image type"); + } +} + +vk::MemoryPropertyFlags +Image::getStagingMemoryPropertyFlags() +{ + switch (this->mMemoryType) { + case MemoryTypes::eDevice: + return vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent; + break; + default: + throw std::runtime_error("Kompute Image invalid image type"); + } +} + +std::shared_ptr +Image::getPrimaryImage() +{ + return this->mPrimaryImage; +} + +uint32_t +Image::getWidth() +{ + return this->mWidth; +} + +uint32_t +Image::getHeight() +{ + return this->mHeight; +} + +uint32_t +Image::getNumChannels() +{ + return this->mNumChannels; +} + +void +Image::allocateMemoryCreateGPUResources() +{ + KP_LOG_DEBUG("Kompute Image creating image"); + + if (!this->mPhysicalDevice) { + throw std::runtime_error("Kompute Image phyisical device is null"); + } + if (!this->mDevice) { + throw std::runtime_error("Kompute Image device is null"); + } + + KP_LOG_DEBUG("Kompute Image creating primary image and memory"); + + this->mPrimaryImage = std::make_shared(); + this->createImage( + this->mPrimaryImage, this->getPrimaryImageUsageFlags(), this->mTiling); + this->mFreePrimaryImage = true; + this->mPrimaryMemory = std::make_shared(); + this->allocateBindMemory(this->mPrimaryImage, + this->mPrimaryMemory, + this->getPrimaryMemoryPropertyFlags()); + this->mFreePrimaryMemory = true; + + if (this->mMemoryType == MemoryTypes::eDevice) { + KP_LOG_DEBUG("Kompute Image creating staging image and memory"); + + this->mStagingImage = std::make_shared(); + this->createImage(this->mStagingImage, + this->getStagingImageUsageFlags(), + vk::ImageTiling::eLinear); + this->mFreeStagingImage = true; + this->mStagingMemory = std::make_shared(); + this->allocateBindMemory(this->mStagingImage, + this->mStagingMemory, + this->getStagingMemoryPropertyFlags()); + this->mFreeStagingMemory = true; + } + + KP_LOG_DEBUG("Kompute Image image & memory creation successful"); +} + +void +Image::createImage(std::shared_ptr image, + vk::ImageUsageFlags imageUsageFlags, + vk::ImageTiling imageTiling) +{ + vk::DeviceSize imageSize = this->memorySize(); + + if (imageSize < 1) { + throw std::runtime_error( + "Kompute Image attempted to create a zero-sized image"); + } + + KP_LOG_DEBUG("Kompute Image creating image with memory size: {}, and " + "usage flags: {}", + imageSize, + vk::to_string(imageUsageFlags)); + + // TODO: Explore having concurrent sharing mode (with option) + vk::ImageCreateInfo imageInfo; + + imageInfo.flags = vk::ImageCreateFlags(); + imageInfo.imageType = vk::ImageType::e2D; + imageInfo.format = this->getFormat(); + imageInfo.extent = vk::Extent3D(this->mWidth, this->mHeight, 1); + imageInfo.usage = imageUsageFlags; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.initialLayout = vk::ImageLayout::eUndefined; + imageInfo.tiling = imageTiling; + + this->mDevice->createImage(&imageInfo, nullptr, image.get()); +} + +void +Image::allocateBindMemory(std::shared_ptr image, + std::shared_ptr memory, + vk::MemoryPropertyFlags memoryPropertyFlags) +{ + + KP_LOG_DEBUG("Kompute Image allocating and binding memory"); + + vk::PhysicalDeviceMemoryProperties memoryProperties = + this->mPhysicalDevice->getMemoryProperties(); + + vk::MemoryRequirements memoryRequirements = + this->mDevice->getImageMemoryRequirements(*image); + + uint32_t memoryTypeIndex = -1; + bool memoryTypeIndexFound = false; + for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) { + if (memoryRequirements.memoryTypeBits & (1 << i)) { + if (((memoryProperties.memoryTypes[i]).propertyFlags & + memoryPropertyFlags) == memoryPropertyFlags) { + memoryTypeIndex = i; + memoryTypeIndexFound = true; + break; + } + } + } + if (!memoryTypeIndexFound) { + throw std::runtime_error( + "Memory type index for image creation not found"); + } + + KP_LOG_DEBUG( + "Kompute Image allocating memory index: {}, size {}, flags: {}", + memoryTypeIndex, + memoryRequirements.size, + vk::to_string(memoryPropertyFlags)); + + vk::MemoryAllocateInfo memoryAllocateInfo(memoryRequirements.size, + memoryTypeIndex); + + this->mDevice->allocateMemory(&memoryAllocateInfo, nullptr, memory.get()); + + this->mDevice->bindImageMemory(*image, *memory, 0); +} + +void +Image::destroy() +{ + KP_LOG_DEBUG("Kompute Image started destroy()"); + + // Setting raw data to null regardless whether device is available to + // invalidate Image + this->mRawData = nullptr; + this->mSize = 0; + this->mDataTypeMemorySize = 0; + + if (!this->mDevice) { + KP_LOG_WARN( + "Kompute Image destructor reached with null Device pointer"); + return; + } + + // Unmap the current memory data + if (this->memoryType() != Image::MemoryTypes::eStorage) { + this->unmapRawData(); + } + + if (this->mFreePrimaryImage) { + if (!this->mPrimaryImage) { + KP_LOG_WARN("Kompose Image expected to destroy primary image " + "but got null image"); + } else { + KP_LOG_DEBUG("Kompose Image destroying primary image"); + this->mDevice->destroy( + *this->mPrimaryImage, + (vk::Optional)nullptr); + this->mPrimaryImage = nullptr; + this->mFreePrimaryImage = false; + } + } + + if (this->mFreeStagingImage) { + if (!this->mStagingImage) { + KP_LOG_WARN("Kompose Image expected to destroy staging image " + "but got null image"); + } else { + KP_LOG_DEBUG("Kompose Image destroying staging image"); + this->mDevice->destroy( + *this->mStagingImage, + (vk::Optional)nullptr); + this->mStagingImage = nullptr; + this->mFreeStagingImage = false; + } + } + + if (this->mFreePrimaryMemory) { + if (!this->mPrimaryMemory) { + KP_LOG_WARN("Kompose Image expected to free primary memory but " + "got null memory"); + } else { + KP_LOG_DEBUG("Kompose Image freeing primary memory"); + this->mDevice->freeMemory( + *this->mPrimaryMemory, + (vk::Optional)nullptr); + this->mPrimaryMemory = nullptr; + this->mFreePrimaryMemory = false; + } + } + + if (this->mFreeStagingMemory) { + if (!this->mStagingMemory) { + KP_LOG_WARN("Kompose Image expected to free staging memory but " + "got null memory"); + } else { + KP_LOG_DEBUG("Kompose Image freeing staging memory"); + this->mDevice->freeMemory( + *this->mStagingMemory, + (vk::Optional)nullptr); + this->mStagingMemory = nullptr; + this->mFreeStagingMemory = false; + } + } + + if (this->mImageView) { + KP_LOG_DEBUG("Kompose Image freeing image view"); + this->mDevice->destroyImageView(*this->mImageView); + this->mImageView = nullptr; + } + + if (this->mDevice) { + this->mDevice = nullptr; + } + + KP_LOG_DEBUG("Kompute Image successful destroy()"); +} + +constexpr size_t +Image::elementTypeSize(Image::ImageDataTypes type) +{ + switch (type) { + case Image::ImageDataTypes::eS8: + return sizeof(int8_t); + case Image::ImageDataTypes::eU8: + return sizeof(uint8_t); + case Image::ImageDataTypes::eS16: + return sizeof(int16_t); + case Image::ImageDataTypes::eU16: + return sizeof(uint16_t); + case Image::ImageDataTypes::eS32: + return sizeof(int32_t); + case Image::ImageDataTypes::eU32: + return sizeof(uint32_t); + case Image::ImageDataTypes::eF16: + return sizeof(int16_t); + case Image::ImageDataTypes::eF32: + return sizeof(float); + default: + throw std::runtime_error("Kompute Image invalid image data type"); + break; + } + + return -1; +} + +vk::Format +Image::getFormat() +{ + switch (this->mDataType) { + case Image::ImageDataTypes::eS8: { + switch (this->mNumChannels) { + case 1: + return vk::Format::eR8Sint; + case 2: + return vk::Format::eR8G8Sint; + case 3: + return vk::Format::eR8G8B8Sint; + case 4: + return vk::Format::eR8G8B8A8Sint; + default: + return vk::Format::eUndefined; + } + } + case Image::ImageDataTypes::eU8: { + switch (this->mNumChannels) { + case 1: + return vk::Format::eR8Uint; + case 2: + return vk::Format::eR8G8Uint; + case 3: + return vk::Format::eR8G8B8Uint; + case 4: + return vk::Format::eR8G8B8A8Uint; + default: + return vk::Format::eUndefined; + } + } + case Image::ImageDataTypes::eU16: { + switch (this->mNumChannels) { + case 1: + return vk::Format::eR16Uint; + case 2: + return vk::Format::eR16G16Uint; + case 3: + return vk::Format::eR16G16B16Uint; + case 4: + return vk::Format::eR16G16B16A16Uint; + default: + return vk::Format::eUndefined; + } + } + case Image::ImageDataTypes::eS16: { + switch (this->mNumChannels) { + case 1: + return vk::Format::eR16Sint; + case 2: + return vk::Format::eR16G16Sint; + case 3: + return vk::Format::eR16G16B16Sint; + case 4: + return vk::Format::eR16G16B16A16Sint; + default: + return vk::Format::eUndefined; + } + } + case Image::ImageDataTypes::eU32: { + switch (this->mNumChannels) { + case 1: + return vk::Format::eR32Uint; + case 2: + return vk::Format::eR32G32Uint; + case 3: + return vk::Format::eR32G32B32Uint; + case 4: + return vk::Format::eR32G32B32A32Uint; + default: + return vk::Format::eUndefined; + } + } + case Image::ImageDataTypes::eS32: { + switch (this->mNumChannels) { + case 1: + return vk::Format::eR32Sint; + case 2: + return vk::Format::eR32G32Sint; + case 3: + return vk::Format::eR32G32B32Sint; + case 4: + return vk::Format::eR32G32B32A32Sint; + default: + return vk::Format::eUndefined; + } + } + case Image::ImageDataTypes::eF16: { + switch (this->mNumChannels) { + case 1: + return vk::Format::eR16Sfloat; + case 2: + return vk::Format::eR16G16Sfloat; + case 3: + return vk::Format::eR16G16B16Sfloat; + case 4: + return vk::Format::eR16G16B16A16Sfloat; + default: + return vk::Format::eUndefined; + } + } + case Image::ImageDataTypes::eF32: { + switch (this->mNumChannels) { + case 1: + return vk::Format::eR32Sfloat; + case 2: + return vk::Format::eR32G32Sfloat; + case 3: + return vk::Format::eR32G32B32Sfloat; + case 4: + return vk::Format::eR32G32B32A32Sfloat; + default: + return vk::Format::eUndefined; + } + } + default: + return vk::Format::eUndefined; + } +} + +template<> +Image::ImageDataTypes +ImageT::dataType() +{ + return Image::ImageDataTypes::eS8; +} + +template<> +Image::ImageDataTypes +ImageT::dataType() +{ + return Image::ImageDataTypes::eU8; +} + +template<> +Image::ImageDataTypes +ImageT::dataType() +{ + return Image::ImageDataTypes::eS16; +} + +template<> +Image::ImageDataTypes +ImageT::dataType() +{ + return Image::ImageDataTypes::eU16; +} + +template<> +Image::ImageDataTypes +ImageT::dataType() +{ + return Image::ImageDataTypes::eS32; +} + +template<> +Image::ImageDataTypes +ImageT::dataType() +{ + return Image::ImageDataTypes::eU32; +} + +template<> +Image::ImageDataTypes +ImageT::dataType() +{ + return Image::ImageDataTypes::eF32; +} + +} diff --git a/src/Manager.cpp b/src/Manager.cpp index 301b4a6f..8381ccf2 100644 --- a/src/Manager.cpp +++ b/src/Manager.cpp @@ -119,6 +119,15 @@ Manager::destroy() this->mManagedTensors.clear(); } + if (this->mManageResources && this->mManagedImages.size()) { + KP_LOG_DEBUG("Kompute Manager explicitly freeing images"); + for (const std::weak_ptr& weakTensor : this->mManagedImages) { + if (std::shared_ptr image = weakTensor.lock()) { + image->destroy(); + } + } + this->mManagedImages.clear(); + } if (this->mFreeDevice) { KP_LOG_INFO("Destroying device"); this->mDevice->destroy( diff --git a/src/Memory.cpp b/src/Memory.cpp new file mode 100644 index 00000000..cc2e583a --- /dev/null +++ b/src/Memory.cpp @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/Memory.hpp" + +namespace kp { + +std::string +Memory::toString(Memory::MemoryTypes dt) +{ + switch (dt) { + case MemoryTypes::eDevice: + return "eDevice"; + case MemoryTypes::eHost: + return "eHost"; + case MemoryTypes::eStorage: + return "eStorage"; + default: + return "unknown"; + } +} + +Memory::MemoryTypes +Memory::memoryType() +{ + return this->mMemoryType; +} + +uint32_t +Memory::size() +{ + return this->mSize; +} + +uint32_t +Memory::dataTypeMemorySize() +{ + return this->mDataTypeMemorySize; +} + +uint32_t +Memory::memorySize() +{ + return this->mSize * this->mDataTypeMemorySize; +} + +void* +Memory::rawData() +{ + if (!this->mRawData) { + this->mapRawData(); + } + return this->mRawData; +} + +void +Memory::setRawData(const void* data) +{ + if (!this->mRawData) { + this->mapRawData(); + } + memcpy(this->mRawData, data, this->memorySize()); +} + +void +Memory::mapRawData() +{ + KP_LOG_DEBUG("Kompute Memory mapping data from host buffer"); + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mMemoryType == MemoryTypes::eHost || + this->mMemoryType == MemoryTypes::eDeviceAndHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mMemoryType == MemoryTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN("Kompute Memory mapping data not supported on {} memory", + Memory::toString(this->memoryType())); + return; + } + + vk::DeviceSize size = this->memorySize(); + + // Given we request coherent host memory we don't need to invalidate / + // flush + this->mRawData = this->mDevice->mapMemory( + *hostVisibleMemory, 0, size, vk::MemoryMapFlags()); + + this->mUnmapMemory = true; +} + +void +Memory::unmapRawData() +{ + KP_LOG_DEBUG("Kompute Memory unmapping data from host buffer"); + if (!this->mUnmapMemory) { + return; + } + + std::shared_ptr hostVisibleMemory = nullptr; + + if (this->mMemoryType == MemoryTypes::eHost || + this->mMemoryType == MemoryTypes::eDeviceAndHost) { + hostVisibleMemory = this->mPrimaryMemory; + } else if (this->mMemoryType == MemoryTypes::eDevice) { + hostVisibleMemory = this->mStagingMemory; + } else { + KP_LOG_WARN("Kompute Tensor mapping data not supported on {} memory", + Memory::toString(this->memoryType())); + return; + } + + vk::DeviceSize size = this->memorySize(); + vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, size); + this->mDevice->flushMappedMemoryRanges(1, &mappedRange); + this->mDevice->unmapMemory(*hostVisibleMemory); + + this->mUnmapMemory = false; +} + +} // end namespace kp diff --git a/src/OpAlgoDispatch.cpp b/src/OpAlgoDispatch.cpp index a76fbd58..15ddb62e 100644 --- a/src/OpAlgoDispatch.cpp +++ b/src/OpAlgoDispatch.cpp @@ -20,9 +20,9 @@ OpAlgoDispatch::record(const vk::CommandBuffer& commandBuffer) KP_LOG_DEBUG("Kompute OpAlgoDispatch record called"); // Barrier to ensure the data is finished writing to buffer memory - for (const std::shared_ptr& tensor : - this->mAlgorithm->getTensors()) { - tensor->recordPrimaryBufferMemoryBarrier( + for (const std::shared_ptr& mem : + this->mAlgorithm->getMemObjects()) { + mem->recordPrimaryMemoryBarrier( commandBuffer, vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eShaderRead, diff --git a/src/OpImageCopy.cpp b/src/OpImageCopy.cpp new file mode 100644 index 00000000..236549ce --- /dev/null +++ b/src/OpImageCopy.cpp @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/operations/OpImageCopy.hpp" +#include "kompute/Image.hpp" + +namespace kp { + +OpImageCopy::OpImageCopy(const std::vector>& images) +{ + KP_LOG_DEBUG("Kompute OpImageCopy constructor with params"); + + if (images.size() < 2) { + throw std::runtime_error( + "Kompute OpImageCopy called with less than 2 images"); + } + + for (std::shared_ptr image : images) { + if (std::dynamic_pointer_cast(image) == nullptr) { + throw std::runtime_error( + "Kompute OpImageCopy: Memory object is not an Image"); + } + this->mImages.push_back(std::dynamic_pointer_cast(image)); + } + + kp::Image::ImageDataTypes dataType = this->mImages[0]->dataType(); + uint32_t size = this->mImages[0]->size(); + for (const std::shared_ptr& image : this->mImages) { + if (image->dataType() != dataType) { + throw std::runtime_error(fmt::format( + "Attempting to copy images of different types from {} to {}", + Image::toString(dataType), + Image::toString(image->dataType()))); + } + if (image->size() != size) { + throw std::runtime_error(fmt::format( + "Attempting to copy images of different sizes from {} to {}", + size, + image->size())); + } + } +} + +OpImageCopy::~OpImageCopy() +{ + KP_LOG_DEBUG("Kompute OpImageCopy destructor started"); +} + +void +OpImageCopy::record(const vk::CommandBuffer& commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpImageCopy record called"); + + // We iterate from the second image onwards and record a copy to all + for (size_t i = 1; i < this->mImages.size(); i++) { + this->mImages[i]->recordCopyFrom(commandBuffer, this->mImages[0]); + } +} + +void +OpImageCopy::preEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpImageCopy preEval called"); +} + +void +OpImageCopy::postEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpImageCopy postEval called"); + + // Do not copy on CPU side if source is storage image + if (this->mImages[0]->memoryType() == Memory::MemoryTypes::eStorage) { + KP_LOG_DEBUG("Kompute OpImageCopy not copying image source given " + "it's of eStorage type"); + return; + } + void* data = this->mImages[0]->rawData(); + + // Copy the data from the first image into all the images + for (size_t i = 1; i < this->mImages.size(); i++) { + if (this->mImages[i]->memoryType() == Memory::MemoryTypes::eStorage) { + KP_LOG_DEBUG("Kompute OpImageCopy not copying to image dest " + "given it's of eStorage type"); + continue; + } + this->mImages[i]->setRawData(data); + } +} + +} diff --git a/src/OpImageCopyToTensor.cpp b/src/OpImageCopyToTensor.cpp new file mode 100644 index 00000000..98b77fc7 --- /dev/null +++ b/src/OpImageCopyToTensor.cpp @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/operations/OpImageCopyToTensor.hpp" +#include "kompute/Tensor.hpp" + +namespace kp { + +OpImageCopyToTensor::OpImageCopyToTensor( + const std::vector>& mem_objects) +{ + KP_LOG_DEBUG("Kompute OpImageCopyToTensor constructor with params"); + + if (mem_objects.size() < 2) { + throw std::runtime_error( + "Kompute OpImageCopyToTensor called with less than 2 mem objects"); + } + + if (std::dynamic_pointer_cast(mem_objects.at(0)) == nullptr) { + throw std::runtime_error( + "Kompute OpImageCopyToTensor: Memory object is not an Image"); + } + this->mImage = std::dynamic_pointer_cast(mem_objects.at(0)); + + for (size_t i = 1; i < mem_objects.size(); i++) { + if (std::dynamic_pointer_cast(mem_objects.at(i)) == nullptr) { + throw std::runtime_error( + "Kompute OpImageCopyToTensor: Memory object is not a Tensor"); + } + this->mTensors.push_back( + std::dynamic_pointer_cast(mem_objects.at(i))); + } + + kp::Image::ImageDataTypes dataType = this->mImage->dataType(); + uint32_t size = this->mImage->size(); + for (const std::shared_ptr& tensor : this->mTensors) { + if (Image::getTensorDataType(dataType) != tensor->dataType()) { + throw std::runtime_error(fmt::format( + "Attempting to copy tensors of different types from {} to {}", + Image::toString(dataType), + Tensor::toString(tensor->dataType()))); + } + if (tensor->size() != size) { + throw std::runtime_error(fmt::format( + "Attempting to copy tensors of different sizes from {} to {}", + size, + tensor->size())); + } + } +} + +OpImageCopyToTensor::~OpImageCopyToTensor() +{ + KP_LOG_DEBUG("Kompute OpImageCopyToTensor destructor started"); +} + +void +OpImageCopyToTensor::record(const vk::CommandBuffer& commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpImageCopyToTensor record called"); + + for (size_t i = 0; i < this->mTensors.size(); i++) { + this->mTensors[i]->recordCopyFrom(commandBuffer, this->mImage); + } +} + +void +OpImageCopyToTensor::preEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpImageCopyToTensor preEval called"); +} + +void +OpImageCopyToTensor::postEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpImageCopyToTensor postEval called"); + + // Do not copy on CPU side if source is storage tensor + if (this->mImage->memoryType() == kp::Memory::MemoryTypes::eStorage) { + KP_LOG_DEBUG( + "Kompute OpImageCopyToTensor not copying tensor source given " + "it's of eStorage type"); + return; + } + void* data = this->mImage->rawData(); + + // Copy the data from the tensor into all the images + for (size_t i = 0; i < this->mTensors.size(); i++) { + if (this->mTensors[i]->memoryType() == + kp::Memory::MemoryTypes::eStorage) { + KP_LOG_DEBUG( + "Kompute OpImageCopyToTensor not copying to tensor dest " + "given it's of eStorage type"); + continue; + } + this->mTensors[i]->setRawData(data); + } +} + +} diff --git a/src/OpImageSyncDevice.cpp b/src/OpImageSyncDevice.cpp new file mode 100644 index 00000000..8610167b --- /dev/null +++ b/src/OpImageSyncDevice.cpp @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/operations/OpImageSyncDevice.hpp" + +namespace kp { + +OpImageSyncDevice::OpImageSyncDevice( + const std::vector>& images) +{ + KP_LOG_DEBUG("Kompute OpImageSyncDevice constructor with params"); + + if (images.size() < 1) { + throw std::runtime_error( + "Kompute OpImageSyncDevice called with less than 1 image"); + } + + for (std::shared_ptr image : images) { + if (std::dynamic_pointer_cast(image) == nullptr) { + throw std::runtime_error( + "Kompute OpImageSyncDevice: Memory object is not an Image"); + } + this->mImages.push_back(std::dynamic_pointer_cast(image)); + } +} + +OpImageSyncDevice::~OpImageSyncDevice() +{ + KP_LOG_DEBUG("Kompute OpImageSyncDevice destructor started"); + + this->mImages.clear(); +} + +void +OpImageSyncDevice::record(const vk::CommandBuffer& commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpImageSyncDevice record called"); + + for (size_t i = 0; i < this->mImages.size(); i++) { + if (this->mImages[i]->memoryType() == Memory::MemoryTypes::eDevice) { + this->mImages[i]->recordCopyFromStagingToDevice(commandBuffer); + } + } +} + +void +OpImageSyncDevice::preEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpImageSyncDevice preEval called"); +} + +void +OpImageSyncDevice::postEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpImageSyncDevice postEval called"); +} + +} diff --git a/src/OpImageSyncLocal.cpp b/src/OpImageSyncLocal.cpp new file mode 100644 index 00000000..c87673a0 --- /dev/null +++ b/src/OpImageSyncLocal.cpp @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/Image.hpp" + +#include "kompute/operations/OpImageSyncLocal.hpp" + +namespace kp { + +OpImageSyncLocal::OpImageSyncLocal( + const std::vector>& images) +{ + KP_LOG_DEBUG("Kompute OpImageSyncLocal constructor with params"); + + if (images.size() < 1) { + throw std::runtime_error( + "Kompute OpImageSyncLocal called with less than 1 image"); + } + + for (std::shared_ptr image : images) { + if (std::dynamic_pointer_cast(image) == nullptr) { + throw std::runtime_error( + "Kompute OpImageSyncLocal: Memory object is not an Image"); + } + this->mImages.push_back(std::dynamic_pointer_cast(image)); + } +} + +OpImageSyncLocal::~OpImageSyncLocal() +{ + KP_LOG_DEBUG("Kompute OpImageSyncLocal destructor started"); +} + +void +OpImageSyncLocal::record(const vk::CommandBuffer& commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpImageSyncLocal record called"); + + for (size_t i = 0; i < this->mImages.size(); i++) { + if (this->mImages[i]->memoryType() == Memory::MemoryTypes::eDevice) { + + this->mImages[i]->recordPrimaryMemoryBarrier( + commandBuffer, + vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer); + + this->mImages[i]->recordCopyFromDeviceToStaging(commandBuffer); + + this->mImages[i]->recordPrimaryMemoryBarrier( + commandBuffer, + vk::AccessFlagBits::eTransferWrite, + vk::AccessFlagBits::eHostRead, + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eHost); + } + } +} + +void +OpImageSyncLocal::preEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpImageSyncLocal preEval called"); +} + +void +OpImageSyncLocal::postEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpImageSyncLocal postEval called"); + + KP_LOG_DEBUG("Kompute OpImageSyncLocal mapping data into image local"); +} + +} diff --git a/src/OpMemoryBarrier.cpp b/src/OpMemoryBarrier.cpp index 1f075a3c..1a473080 100644 --- a/src/OpMemoryBarrier.cpp +++ b/src/OpMemoryBarrier.cpp @@ -5,7 +5,7 @@ namespace kp { OpMemoryBarrier::OpMemoryBarrier( - const std::vector>& tensors, + const std::vector>& memObjects, const vk::AccessFlagBits& srcAccessMask, const vk::AccessFlagBits& dstAccessMask, const vk::PipelineStageFlagBits& srcStageMask, @@ -16,7 +16,7 @@ OpMemoryBarrier::OpMemoryBarrier( , mSrcStageMask(srcStageMask) , mDstStageMask(dstStageMask) , mBarrierOnPrimary(barrierOnPrimary) - , mTensors(tensors) + , mMemObjects(memObjects) { KP_LOG_DEBUG("Kompute OpMemoryBarrier constructor"); } @@ -33,16 +33,16 @@ OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer) // Barrier to ensure the data is finished writing to buffer memory if (this->mBarrierOnPrimary) { - for (const std::shared_ptr& tensor : this->mTensors) { - tensor->recordPrimaryBufferMemoryBarrier(commandBuffer, + for (const std::shared_ptr& tensor : this->mMemObjects) { + tensor->recordPrimaryMemoryBarrier(commandBuffer, this->mSrcAccessMask, this->mDstAccessMask, this->mSrcStageMask, this->mDstStageMask); } } else { - for (const std::shared_ptr& tensor : this->mTensors) { - tensor->recordStagingBufferMemoryBarrier(commandBuffer, + for (const std::shared_ptr& tensor : this->mMemObjects) { + tensor->recordStagingMemoryBarrier(commandBuffer, this->mSrcAccessMask, this->mDstAccessMask, this->mSrcStageMask, diff --git a/src/OpTensorCopy.cpp b/src/OpTensorCopy.cpp index 04336af8..712b6921 100644 --- a/src/OpTensorCopy.cpp +++ b/src/OpTensorCopy.cpp @@ -5,20 +5,26 @@ namespace kp { -OpTensorCopy::OpTensorCopy(const std::vector>& tensors) +OpTensorCopy::OpTensorCopy(const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorCopy constructor with params"); - this->mTensors = tensors; + if (tensors.size() < 2) { + throw std::runtime_error( + "Kompute OpTensorCopy called with less than 2 tensors"); + } - if (this->mTensors.size() < 2) { + for (std::shared_ptr tensor : tensors) { + if (std::dynamic_pointer_cast(tensor) == nullptr) { throw std::runtime_error( - "Kompute OpTensorCopy called with less than 2 tensor"); + "Kompute OpTensorCopy: Memory object is not a Tensor"); + } + this->mTensors.push_back(std::dynamic_pointer_cast(tensor)); } kp::Tensor::TensorDataTypes dataType = this->mTensors[0]->dataType(); uint32_t size = this->mTensors[0]->size(); - for (const std::shared_ptr& tensor : tensors) { + for (const std::shared_ptr& tensor : this->mTensors) { if (tensor->dataType() != dataType) { throw std::runtime_error(fmt::format( "Attempting to copy tensors of different types from {} to {}", @@ -62,7 +68,7 @@ OpTensorCopy::postEval(const vk::CommandBuffer& /*commandBuffer*/) KP_LOG_DEBUG("Kompute OpTensorCopy postEval called"); // Do not copy on CPU side if source is storage tensor - if (this->mTensors[0]->tensorType() == kp::Tensor::TensorTypes::eStorage) { + if (this->mTensors[0]->memoryType() == kp::Memory::MemoryTypes::eStorage) { KP_LOG_DEBUG("Kompute OpTensorCopy not copying tensor source given " "it's of eStorage type"); return; @@ -71,8 +77,8 @@ OpTensorCopy::postEval(const vk::CommandBuffer& /*commandBuffer*/) // Copy the data from the first tensor into all the tensors for (size_t i = 1; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() == - kp::Tensor::TensorTypes::eStorage) { + if (this->mTensors[i]->memoryType() == + kp::Memory::MemoryTypes::eStorage) { KP_LOG_DEBUG("Kompute OpTensorCopy not copying to tensor dest " "given it's of eStorage type"); continue; diff --git a/src/OpTensorCopyToImage.cpp b/src/OpTensorCopyToImage.cpp new file mode 100644 index 00000000..efaa8739 --- /dev/null +++ b/src/OpTensorCopyToImage.cpp @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "kompute/operations/OpTensorCopyToImage.hpp" +#include "kompute/Tensor.hpp" + +namespace kp { + +OpTensorCopyToImage::OpTensorCopyToImage( + const std::vector>& mem_objects) +{ + KP_LOG_DEBUG("Kompute OpTensorCopyToImage constructor with params"); + + if (mem_objects.size() < 2) { + throw std::runtime_error( + "Kompute OpTensorCopyToImage called with less than 2 mem objects"); + } + + if (std::dynamic_pointer_cast(mem_objects.at(0)) == nullptr) { + throw std::runtime_error( + "Kompute OpTensorCopyToImage: Memory object is not a Tensor"); + } + this->mTensor = std::dynamic_pointer_cast(mem_objects.at(0)); + + for (size_t i = 1; i < mem_objects.size(); i++) { + if (std::dynamic_pointer_cast(mem_objects.at(i)) == nullptr) { + throw std::runtime_error( + "Kompute OpTensorCopyToImage: Memory object is not an Image"); + } + this->mImages.push_back( + std::dynamic_pointer_cast(mem_objects.at(i))); + } + + kp::Tensor::TensorDataTypes dataType = this->mTensor->dataType(); + uint32_t size = this->mTensor->size(); + for (const std::shared_ptr& image : this->mImages) { + if (Image::getTensorDataType(image->dataType()) != dataType) { + throw std::runtime_error(fmt::format( + "Attempting to copy tensors of different types from {} to {}", + Tensor::toString(dataType), + Image::toString(image->dataType()))); + } + if (image->size() != size) { + throw std::runtime_error(fmt::format( + "Attempting to copy tensors of different sizes from {} to {}", + size, + image->size())); + } + } +} + +OpTensorCopyToImage::~OpTensorCopyToImage() +{ + KP_LOG_DEBUG("Kompute OpTensorCopyToImage destructor started"); +} + +void +OpTensorCopyToImage::record(const vk::CommandBuffer& commandBuffer) +{ + KP_LOG_DEBUG("Kompute OpTensorCopyToImage record called"); + + for (size_t i = 0; i < this->mImages.size(); i++) { + this->mImages[i]->recordCopyFrom(commandBuffer, this->mTensor); + } +} + +void +OpTensorCopyToImage::preEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpTensorCopyToImage preEval called"); +} + +void +OpTensorCopyToImage::postEval(const vk::CommandBuffer& /*commandBuffer*/) +{ + KP_LOG_DEBUG("Kompute OpTensorCopyToImage postEval called"); + + // Do not copy on CPU side if source is storage tensor + if (this->mTensor->memoryType() == kp::Memory::MemoryTypes::eStorage) { + KP_LOG_DEBUG( + "Kompute OpTensorCopyToImage not copying tensor source given " + "it's of eStorage type"); + return; + } + void* data = this->mTensor->rawData(); + + // Copy the data from the tensor into all the images + for (size_t i = 0; i < this->mImages.size(); i++) { + if (this->mImages[i]->memoryType() == + kp::Memory::MemoryTypes::eStorage) { + KP_LOG_DEBUG( + "Kompute OpTensorCopyToImage not copying to tensor dest " + "given it's of eStorage type"); + continue; + } + this->mImages[i]->setRawData(data); + } +} + +} diff --git a/src/OpTensorSyncDevice.cpp b/src/OpTensorSyncDevice.cpp index a2542357..c3c743b0 100644 --- a/src/OpTensorSyncDevice.cpp +++ b/src/OpTensorSyncDevice.cpp @@ -5,7 +5,7 @@ namespace kp { OpTensorSyncDevice::OpTensorSyncDevice( - const std::vector>& tensors) + const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorSyncDevice constructor with params"); @@ -14,7 +14,13 @@ OpTensorSyncDevice::OpTensorSyncDevice( "Kompute OpTensorSyncDevice called with less than 1 tensor"); } - this->mTensors = tensors; + for (std::shared_ptr tensor : tensors) { + if (std::dynamic_pointer_cast(tensor) == nullptr) { + throw std::runtime_error( + "Kompute OpTensorSyncDevice: Memory object is not a Tensor"); + } + this->mTensors.push_back(std::dynamic_pointer_cast(tensor)); + } } OpTensorSyncDevice::~OpTensorSyncDevice() @@ -30,7 +36,7 @@ OpTensorSyncDevice::record(const vk::CommandBuffer& commandBuffer) KP_LOG_DEBUG("Kompute OpTensorSyncDevice record called"); for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + if (this->mTensors[i]->memoryType() == Tensor::MemoryTypes::eDevice) { this->mTensors[i]->recordCopyFromStagingToDevice(commandBuffer); } } diff --git a/src/OpTensorSyncLocal.cpp b/src/OpTensorSyncLocal.cpp index 7818db56..3ffff043 100644 --- a/src/OpTensorSyncLocal.cpp +++ b/src/OpTensorSyncLocal.cpp @@ -7,7 +7,7 @@ namespace kp { OpTensorSyncLocal::OpTensorSyncLocal( - const std::vector>& tensors) + const std::vector>& tensors) { KP_LOG_DEBUG("Kompute OpTensorSyncLocal constructor with params"); @@ -16,7 +16,13 @@ OpTensorSyncLocal::OpTensorSyncLocal( "Kompute OpTensorSyncLocal called with less than 1 tensor"); } - this->mTensors = tensors; + for (std::shared_ptr tensor : tensors) { + if (std::dynamic_pointer_cast(tensor) == nullptr) { + throw std::runtime_error( + "Kompute OpTensorSyncLocal: Memory object is not a Tensor"); + } + this->mTensors.push_back(std::dynamic_pointer_cast(tensor)); + } } OpTensorSyncLocal::~OpTensorSyncLocal() @@ -30,19 +36,21 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer) KP_LOG_DEBUG("Kompute OpTensorSyncLocal record called"); for (size_t i = 0; i < this->mTensors.size(); i++) { - if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) { + if (this->mTensors[i]->memoryType() == Memory::MemoryTypes::eDevice) { - this->mTensors[i]->recordPrimaryBufferMemoryBarrier( + this->mTensors[i]->recordPrimaryMemoryBarrier( commandBuffer, vk::AccessFlagBits::eShaderWrite, + // FIXME: eTransferRead is not supported for the compute pipeline vk::AccessFlagBits::eTransferRead, vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eTransfer); this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer); - this->mTensors[i]->recordPrimaryBufferMemoryBarrier( + this->mTensors[i]->recordPrimaryMemoryBarrier( commandBuffer, + // FIXME: eTransferRead is not supported for the compute pipeline vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eHostRead, vk::PipelineStageFlagBits::eTransfer, diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 5a906578..6ecefc09 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "kompute/Tensor.hpp" +#include "kompute/Image.hpp" namespace kp { @@ -23,20 +24,6 @@ Tensor::toString(Tensor::TensorDataTypes dt) } } -std::string -Tensor::toString(Tensor::TensorTypes dt) -{ - switch (dt) { - case TensorTypes::eDevice: - return "eDevice"; - case TensorTypes::eHost: - return "eHost"; - case TensorTypes::eStorage: - return "eStorage"; - default: - return "unknown"; - } -} Tensor::Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, @@ -44,16 +31,17 @@ Tensor::Tensor(std::shared_ptr physicalDevice, uint32_t elementTotalCount, uint32_t elementMemorySize, const TensorDataTypes& dataType, - const TensorTypes& tensorType) + const MemoryTypes& memoryType) { KP_LOG_DEBUG("Kompute Tensor constructor data length: {}, and type: {}", elementTotalCount, - Tensor::toString(tensorType)); + Memory::toString(memoryType)); this->mPhysicalDevice = physicalDevice; this->mDevice = device; this->mDataType = dataType; - this->mTensorType = tensorType; + this->mMemoryType = memoryType; + this->mDescriptorType = vk::DescriptorType::eStorageBuffer; this->rebuild(data, elementTotalCount, elementMemorySize); } @@ -61,7 +49,7 @@ Tensor::Tensor(std::shared_ptr physicalDevice, Tensor::~Tensor() { KP_LOG_DEBUG("Kompute Tensor destructor started. Type: {}", - Tensor::toString(this->tensorType())); + Memory::toString(this->memoryType())); if (this->mDevice) { this->destroy(); @@ -88,41 +76,16 @@ Tensor::rebuild(void* data, this->allocateMemoryCreateGPUResources(); - if (this->tensorType() != Tensor::TensorTypes::eStorage) { + if (this->memoryType() != Memory::MemoryTypes::eStorage) { this->mapRawData(); memcpy(this->mRawData, data, this->memorySize()); } } -Tensor::TensorTypes -Tensor::tensorType() -{ - return this->mTensorType; -} - bool Tensor::isInit() { - return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory && - this->mRawData; -} - -uint32_t -Tensor::size() -{ - return this->mSize; -} - -uint32_t -Tensor::dataTypeMemorySize() -{ - return this->mDataTypeMemorySize; -} - -uint32_t -Tensor::memorySize() -{ - return this->mSize * this->mDataTypeMemorySize; + return this->mDevice && this->mPrimaryBuffer && this->mPrimaryMemory; } kp::Tensor::TensorDataTypes @@ -131,80 +94,46 @@ Tensor::dataType() return this->mDataType; } -void* -Tensor::rawData() -{ - return this->mRawData; -} - void -Tensor::setRawData(const void* data) -{ - memcpy(this->mRawData, data, this->memorySize()); -} - -void -Tensor::mapRawData() +Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, + std::shared_ptr copyFromTensor) { - KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN("Kompute Tensor mapping data not supported on {} tensor", - toString(this->tensorType())); - return; - } + vk::DeviceSize bufferSize(this->memorySize()); + vk::BufferCopy copyRegion(0, 0, bufferSize); - vk::DeviceSize bufferSize = this->memorySize(); + KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize); - // Given we request coherent host memory we don't need to invalidate / - // flush - this->mRawData = this->mDevice->mapMemory( - *hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags()); + this->recordCopyBuffer(commandBuffer, + copyFromTensor->mPrimaryBuffer, + this->mPrimaryBuffer, + bufferSize, + copyRegion); } void -Tensor::unmapRawData() +Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, + std::shared_ptr copyFromImage) { - KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer"); - - std::shared_ptr hostVisibleMemory = nullptr; - - if (this->mTensorType == TensorTypes::eHost) { - hostVisibleMemory = this->mPrimaryMemory; - } else if (this->mTensorType == TensorTypes::eDevice) { - hostVisibleMemory = this->mStagingMemory; - } else { - KP_LOG_WARN("Kompute Tensor mapping data not supported on {} tensor", - toString(this->tensorType())); - return; - } + vk::DeviceSize bufferSize(this->memorySize()); - vk::DeviceSize bufferSize = this->memorySize(); - vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize); - this->mDevice->flushMappedMemoryRanges(1, &mappedRange); - this->mDevice->unmapMemory(*hostVisibleMemory); -} + vk::ImageSubresourceLayers layer = {}; + layer.aspectMask = vk::ImageAspectFlagBits::eColor; + layer.layerCount = 1; + vk::Offset3D offset = { 0, 0, 0 }; -void -Tensor::recordCopyFrom(const vk::CommandBuffer& commandBuffer, - std::shared_ptr copyFromTensor) -{ + // FIXME: Check the size of the dest and source images match + vk::Extent3D size = { copyFromImage->getWidth(), + copyFromImage->getHeight(), + 1 }; - vk::DeviceSize bufferSize(this->memorySize()); - vk::BufferCopy copyRegion(0, 0, bufferSize); + vk::BufferImageCopy copyRegion(0, 0, 0, layer, offset, size); KP_LOG_DEBUG("Kompute Tensor recordCopyFrom data size {}.", bufferSize); - this->recordCopyBuffer(commandBuffer, - copyFromTensor->mPrimaryBuffer, + this->recordCopyBufferFromImage(commandBuffer, + copyFromImage->getPrimaryImage(), this->mPrimaryBuffer, bufferSize, copyRegion); @@ -252,7 +181,18 @@ Tensor::recordCopyBuffer(const vk::CommandBuffer& commandBuffer, } void -Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, +Tensor::recordCopyBufferFromImage(const vk::CommandBuffer& commandBuffer, + std::shared_ptr imageFrom, + std::shared_ptr bufferTo, + vk::DeviceSize /*bufferSize*/, + vk::BufferImageCopy copyRegion) +{ + commandBuffer.copyImageToBuffer( + *imageFrom, vk::ImageLayout::eGeneral, *bufferTo, 1, ©Region); +} + +void +Tensor::recordPrimaryMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::AccessFlagBits srcAccessMask, vk::AccessFlagBits dstAccessMask, vk::PipelineStageFlagBits srcStageMask, @@ -269,7 +209,7 @@ Tensor::recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, } void -Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, +Tensor::recordStagingMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::AccessFlagBits srcAccessMask, vk::AccessFlagBits dstAccessMask, vk::PipelineStageFlagBits srcStageMask, @@ -285,6 +225,7 @@ Tensor::recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, dstStageMask); } +// FIXME: Make this private. void Tensor::recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, const vk::Buffer& buffer, @@ -324,21 +265,38 @@ Tensor::constructDescriptorBufferInfo() bufferSize); } +vk::WriteDescriptorSet +Tensor::constructDescriptorSet(vk::DescriptorSet descriptorSet, + uint32_t binding) +{ + KP_LOG_DEBUG("Kompute Tensor construct descriptor set for binding {}", + binding); + + mDescriptorBufferInfo = this->constructDescriptorBufferInfo(); + + vk::WriteDescriptorSet writeDesciptorSet = vk::WriteDescriptorSet( + descriptorSet, + binding, // Destination binding + 0, // Destination array element + 1, // Descriptor count + vk::DescriptorType::eStorageBuffer, + nullptr, // Descriptor image info + // Using a member variable here means the address is still valid later + &mDescriptorBufferInfo); + + return writeDesciptorSet; +} vk::BufferUsageFlags Tensor::getPrimaryBufferUsageFlags() { - switch (this->mTensorType) { - case TensorTypes::eDevice: + switch (this->mMemoryType) { + case MemoryTypes::eDevice: + case MemoryTypes::eHost: return vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst; break; - case TensorTypes::eHost: - return vk::BufferUsageFlagBits::eStorageBuffer | - vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eTransferDst; - break; - case TensorTypes::eStorage: + case MemoryTypes::eStorage: return vk::BufferUsageFlagBits::eStorageBuffer; break; default: @@ -349,15 +307,15 @@ Tensor::getPrimaryBufferUsageFlags() vk::MemoryPropertyFlags Tensor::getPrimaryMemoryPropertyFlags() { - switch (this->mTensorType) { - case TensorTypes::eDevice: + switch (this->mMemoryType) { + case MemoryTypes::eDevice: return vk::MemoryPropertyFlagBits::eDeviceLocal; break; - case TensorTypes::eHost: + case MemoryTypes::eHost: return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; break; - case TensorTypes::eStorage: + case MemoryTypes::eStorage: return vk::MemoryPropertyFlagBits::eDeviceLocal; break; default: @@ -368,8 +326,8 @@ Tensor::getPrimaryMemoryPropertyFlags() vk::BufferUsageFlags Tensor::getStagingBufferUsageFlags() { - switch (this->mTensorType) { - case TensorTypes::eDevice: + switch (this->mMemoryType) { + case MemoryTypes::eDevice: return vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst; break; @@ -381,8 +339,8 @@ Tensor::getStagingBufferUsageFlags() vk::MemoryPropertyFlags Tensor::getStagingMemoryPropertyFlags() { - switch (this->mTensorType) { - case TensorTypes::eDevice: + switch (this->mMemoryType) { + case MemoryTypes::eDevice: return vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; break; @@ -391,6 +349,12 @@ Tensor::getStagingMemoryPropertyFlags() } } +std::shared_ptr +Tensor::getPrimaryBuffer() +{ + return this->mPrimaryBuffer; +} + void Tensor::allocateMemoryCreateGPUResources() { @@ -415,7 +379,7 @@ Tensor::allocateMemoryCreateGPUResources() this->getPrimaryMemoryPropertyFlags()); this->mFreePrimaryMemory = true; - if (this->mTensorType == TensorTypes::eDevice) { + if (this->mMemoryType == MemoryTypes::eDevice) { KP_LOG_DEBUG("Kompute Tensor creating staging buffer and memory"); this->mStagingBuffer = std::make_shared(); @@ -521,7 +485,7 @@ Tensor::destroy() } // Unmap the current memory data - if (this->tensorType() != Tensor::TensorTypes::eStorage) { + if (this->memoryType() != Memory::MemoryTypes::eStorage) { this->unmapRawData(); } diff --git a/src/include/kompute/Algorithm.hpp b/src/include/kompute/Algorithm.hpp index 1917dd37..5a2646a1 100644 --- a/src/include/kompute/Algorithm.hpp +++ b/src/include/kompute/Algorithm.hpp @@ -36,7 +36,7 @@ class Algorithm */ template Algorithm(std::shared_ptr device, - const std::vector>& tensors = {}, + const std::vector>& memObjects = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, const std::vector& specializationConstants = {}, @@ -46,20 +46,20 @@ class Algorithm this->mDevice = device; - if (tensors.size() && spirv.size()) { + if (memObjects.size() && spirv.size()) { KP_LOG_INFO( "Kompute Algorithm initialising with tensor size: {} and " "spirv size: {}", - tensors.size(), + memObjects.size(), spirv.size()); - this->rebuild(tensors, + this->rebuild(memObjects, spirv, workgroup, specializationConstants, pushConstants); } else { KP_LOG_INFO( - "Kompute Algorithm constructor with empty tensors and or " + "Kompute Algorithm constructor with empty mem objects and or " "spirv so not rebuilding vulkan components"); } } @@ -81,7 +81,7 @@ class Algorithm * as this initial value. */ template - void rebuild(const std::vector>& tensors, + void rebuild(const std::vector>& memObjects, const std::vector& spirv, const Workgroup& workgroup = {}, const std::vector& specializationConstants = {}, @@ -89,7 +89,7 @@ class Algorithm { KP_LOG_DEBUG("Kompute Algorithm rebuild started"); - this->mTensors = tensors; + this->mMemObjects = memObjects; this->mSpirv = spirv; if (specializationConstants.size()) { @@ -122,7 +122,8 @@ class Algorithm } this->setWorkgroup( - workgroup, this->mTensors.size() ? this->mTensors[0]->size() : 1); + workgroup, + this->mMemObjects.size() ? this->mMemObjects[0]->size() : 1); // Descriptor pool is created first so if available then destroy all // before rebuild @@ -267,18 +268,18 @@ class Algorithm ((T*)this->mPushConstantsData) + this->mPushConstantsSize }; } /** - * Gets the current tensors that are used in the algorithm. + * Gets the current memory objects that are used in the algorithm. * - * @returns The list of tensors used in the algorithm. + * @returns The list of memory objects used in the algorithm. */ - const std::vector>& getTensors(); + const std::vector>& getMemObjects(); void destroy(); private: // -------------- NEVER OWNED RESOURCES std::shared_ptr mDevice; - std::vector> mTensors; + std::vector> mMemObjects; // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mDescriptorSetLayout; diff --git a/src/include/kompute/Image.hpp b/src/include/kompute/Image.hpp new file mode 100644 index 00000000..55089229 --- /dev/null +++ b/src/include/kompute/Image.hpp @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "Memory.hpp" +#include "Tensor.hpp" +#include "kompute/Core.hpp" +#include "logger/Logger.hpp" +#include +#include + +namespace kp { + +/** + * Image data used in GPU operations. + * + * Each image would has a respective Vulkan memory and image, which + * would be used to store their respective data. The images can be used for GPU + * data storage or transfer. + */ +class Image : public Memory +{ + public: + enum class ImageDataTypes + { + eS8 = 0, + eU8 = 1, + eS16 = 2, + eU16 = 3, + eS32 = 4, + eU32 = 5, + eF16 = 6, + eF32 = 7, + }; + + static std::string toString(ImageDataTypes dt); + + static Tensor::TensorDataTypes getTensorDataType(ImageDataTypes dt); + + /** + * Constructor with data provided which would be used to create the + * respective vulkan image and memory. + * + * @param physicalDevice The physical device to use to fetch properties + * @param device The device to use to create the image and memory from + * @param data Non-zero-sized vector of data that will be used by the + * image + * @param width Width of the image in pixels + * @param height Height of the image in pixels + * @param numChannels The number of channels in the image + * @param dataType Data type for the image which is of type ImageDataTypes + * @param memoryType Type for the image which is of type MemoryTypes + * @param tiling Tiling mode to use for the image. + */ + Image(std::shared_ptr physicalDevice, + std::shared_ptr device, + void* data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + const ImageDataTypes& dataType, + vk::ImageTiling tiling, + const MemoryTypes& memoryType = MemoryTypes::eDevice) + { + init(physicalDevice, + device, + data, + width, + height, + numChannels, + dataType, + tiling, + memoryType); + } + + /** + * Constructor with data provided which would be used to create the + * respective vulkan image and memory. No tiling has been provided + * so will be inferred from \p memoryType. + * + * @param physicalDevice The physical device to use to fetch properties + * @param device The device to use to create the image and memory from + * @param data Non-zero-sized vector of data that will be used by the + * image + * @param width Width of the image in pixels + * @param height Height of the image in pixels + * @param numChannels The number of channels in the image + * @param dataType Data type for the image which is of type ImageDataTypes + * @param memoryType Type for the image which is of type MemoryTypes + * @param tiling Tiling mode to use for the image. + */ + Image(std::shared_ptr physicalDevice, + std::shared_ptr device, + void* data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + const ImageDataTypes& dataType, + const MemoryTypes& memoryType = MemoryTypes::eDevice) + { + vk::ImageTiling tiling; + + if (memoryType == MemoryTypes::eHost || + memoryType == MemoryTypes::eDeviceAndHost) { + // Host-accessible memory must be linear-tiled. + tiling = vk::ImageTiling::eLinear; + } else if (memoryType == MemoryTypes::eDevice || + memoryType == MemoryTypes::eStorage) { + tiling = vk::ImageTiling::eOptimal; + } else { + throw std::runtime_error("Kompute Image unsupported memory type"); + } + + init(physicalDevice, + device, + data, + width, + height, + numChannels, + dataType, + tiling, + memoryType); + } + + /** + * Destructor which is in charge of freeing vulkan resources unless they + * have been provided externally. + */ + virtual ~Image(); + + /** + * Function to trigger reinitialisation of the image and memory with + * new data as well as new potential device type. + * + * @param data Vector of data to use to initialise image from + */ + void rebuild(void* data); + + /** + * Destroys and frees the GPU resources which include the image and memory. + */ + void destroy(); + + /** + * Check whether image is initialized based on the created gpu resources. + * + * @returns Boolean stating whether image is initialized + */ + bool isInit(); + + /** + * Records a copy from the memory of the image provided to the current + * image. This is intended to pass memory into a processing, to perform + * a staging image transfer, or to gather output (between others). + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + * @param copyFromImage Image to copy the data from + */ + void recordCopyFrom(const vk::CommandBuffer& commandBuffer, + std::shared_ptr copyFromImage); + + /** + * Records a copy from the memory of the tensor provided to the current + * image. This is intended to pass memory into a processing, to perform + * a staging image transfer, or to gather output (between others). + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + * @param copyFromTensor Tensor to copy the data from + */ + void recordCopyFrom(const vk::CommandBuffer& commandBuffer, + std::shared_ptr copyFromTensor); + + /** + * Records a copy from the internal staging memory to the device memory + * using an optional barrier to wait for the operation. This function would + * only be relevant for kp::images of type eDevice. + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + */ + void recordCopyFromStagingToDevice(const vk::CommandBuffer& commandBuffer); + + /** + * Records a copy from the internal device memory to the staging memory + * using an optional barrier to wait for the operation. This function would + * only be relevant for kp::images of type eDevice. + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + */ + void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer); + + /** + * Records the image memory barrier into the primary image and command + * buffer which ensures that relevant data transfers are carried out + * correctly. + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + * @param srcAccessMask Access flags for source access mask + * @param dstAccessMask Access flags for destination access mask + * @param scrStageMask Pipeline stage flags for source stage mask + * @param dstStageMask Pipeline stage flags for destination stage mask + */ + void recordPrimaryMemoryBarrier(const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask); + /** + * Records the image memory barrier into the staging image and command + * buffer which ensures that relevant data transfers are carried out + * correctly. + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + * @param srcAccessMask Access flags for source access mask + * @param dstAccessMask Access flags for destination access mask + * @param scrStageMask Pipeline stage flags for source stage mask + * @param dstStageMask Pipeline stage flags for destination stage mask + */ + void recordStagingMemoryBarrier(const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask); + + /** + * Adds this object to a Vulkan descriptor set at \p binding. + * + * @param descriptorSet The descriptor set to add to. + * @param binding The binding number to use. + * @return Add this object to a descriptor set at \p binding. + */ + vk::WriteDescriptorSet constructDescriptorSet( + vk::DescriptorSet descriptorSet, + uint32_t binding); + + /** + * Retrieve the data type of the image (host, device, storage) + * + * @return Data type of image of type kp::image::ImageDataTypes + */ + ImageDataTypes dataType(); + + std::shared_ptr getPrimaryImage(); + + /*** + * Retreive the width in the image in pixels + * + * @return Width of the image in pixels + */ + uint32_t getWidth(); + + /*** + * Retreive the height in the image in pixels + * + * @return Height of the image in pixels + */ + uint32_t getHeight(); + + /*** + * Retreive the number of channels in the image + * + * @return Number of channels in the image + */ + uint32_t getNumChannels(); + + protected: + // -------------- ALWAYS OWNED RESOURCES + ImageDataTypes mDataType; + uint32_t mWidth; + uint32_t mHeight; + uint32_t mNumChannels; + vk::DescriptorImageInfo mDescriptorImageInfo; + vk::ImageLayout mPrimaryImageLayout = vk::ImageLayout::eUndefined; + vk::ImageLayout mStagingImageLayout = vk::ImageLayout::eUndefined; + std::shared_ptr mImageView = nullptr; + vk::ImageTiling mTiling = vk::ImageTiling::eOptimal; + + private: + // -------------- OPTIONALLY OWNED RESOURCES + std::shared_ptr mPrimaryImage; + bool mFreePrimaryImage = false; + std::shared_ptr mStagingImage; + bool mFreeStagingImage = false; + + void allocateMemoryCreateGPUResources(); // Creates the vulkan image + void createImage(std::shared_ptr image, + vk::ImageUsageFlags imageUsageFlags, + vk::ImageTiling imageTiling); + void allocateBindMemory(std::shared_ptr image, + std::shared_ptr memory, + vk::MemoryPropertyFlags memoryPropertyFlags); + void recordCopyImage(const vk::CommandBuffer& commandBuffer, + std::shared_ptr imageFrom, + std::shared_ptr imageTo, + vk::ImageCopy copyRegion); + void recordCopyImageFromTensor(const vk::CommandBuffer& commandBuffer, + std::shared_ptr bufferFrom, + std::shared_ptr imageTo, + vk::BufferImageCopy copyRegion); + void recordImageMemoryBarrier(const vk::CommandBuffer& commandBuffer, + const vk::Image& image, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask, + vk::ImageLayout oldLayout, + vk::ImageLayout newLayout); + + // Private util functions + vk::ImageUsageFlags getPrimaryImageUsageFlags(); + vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags(); + vk::ImageUsageFlags getStagingImageUsageFlags(); + vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); + + constexpr size_t elementTypeSize(ImageDataTypes type); + vk::Format getFormat(); + + vk::DescriptorImageInfo constructDescriptorImageInfo(); + + void init(std::shared_ptr physicalDevice, + std::shared_ptr device, + void* data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + const ImageDataTypes& dataType, + vk::ImageTiling tiling, + const MemoryTypes& memoryType = MemoryTypes::eDevice); +}; + +template +class ImageT : public Image +{ + + public: + ImageT(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + vk::ImageTiling tiling, + const MemoryTypes& imageType = MemoryTypes::eDevice) + : Image(physicalDevice, + device, + (void*)data.data(), + width, + height, + numChannels, + this->dataType(), + tiling, + imageType) + { + KP_LOG_DEBUG("Kompute imageT constructor with data size {}, width {}, " + "height {}, and num channels {}", + data.size(), + width, + height, + numChannels); + if (data.size() == 0) { + throw std::runtime_error( + "Kompute ImageT attempted to create a zero-sized image"); + } + + if (data.size() < width * height * numChannels) { + throw std::runtime_error( + "Kompute ImageT vector is smaller than the requested image size"); + } + } + + ImageT(std::shared_ptr physicalDevice, + std::shared_ptr device, + const std::vector& data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + const MemoryTypes& imageType = MemoryTypes::eDevice) + : Image(physicalDevice, + device, + (void*)data.data(), + width, + height, + numChannels, + this->dataType(), + imageType) + { + KP_LOG_DEBUG("Kompute imageT constructor with data size {}, width {}, " + "height {}, and num channels {}", + data.size(), + width, + height, + numChannels); + if (data.size() == 0) { + throw std::runtime_error( + "Kompute ImageT attempted to create a zero-sized image"); + } + + if (data.size() < width * height * numChannels) { + throw std::runtime_error( + "Kompute ImageT vector is smaller than the requested image size"); + } + } + + ImageT(std::shared_ptr physicalDevice, + std::shared_ptr device, + uint32_t width, + uint32_t height, + uint32_t numChannels, + vk::ImageTiling tiling, + const MemoryTypes& imageType = MemoryTypes::eDevice) + : Image(physicalDevice, + device, + width, + height, + numChannels, + this->dataType(), + tiling, + imageType) + { + KP_LOG_DEBUG("Kompute imageT constructor with no data, width {}, " + "height {}, and num channels {}", + width, + height, + numChannels); + } + + ImageT(std::shared_ptr physicalDevice, + std::shared_ptr device, + uint32_t width, + uint32_t height, + uint32_t numChannels, + const MemoryTypes& imageType = MemoryTypes::eDevice) + : Image(physicalDevice, + device, + width, + height, + numChannels, + this->dataType(), + imageType) + { + KP_LOG_DEBUG("Kompute imageT constructor with no data, width {}, " + "height {}, and num channels {}", + width, + height, + numChannels); + } + + ~ImageT() { KP_LOG_DEBUG("Kompute imageT destructor"); } + + std::vector vector() { return Memory::vector(); } + + T& operator[](int index) { return *(Memory::data() + index); } + + void setData(const std::vector& data) + { + + KP_LOG_DEBUG("Kompute imageT setting data with data size {}", + data.size()); + + if (data.size() != this->mSize) { + throw std::runtime_error( + "Kompute imageT Cannot set data of different sizes"); + } + + Image::setRawData(data.data()); + } + + ImageDataTypes dataType(); +}; + +} // End namespace kp diff --git a/src/include/kompute/Kompute.hpp b/src/include/kompute/Kompute.hpp index e54adc1b..bb64635e 100644 --- a/src/include/kompute/Kompute.hpp +++ b/src/include/kompute/Kompute.hpp @@ -2,15 +2,21 @@ #include "Algorithm.hpp" #include "Core.hpp" +#include "Image.hpp" #include "Manager.hpp" #include "Sequence.hpp" #include "Tensor.hpp" #include "operations/OpAlgoDispatch.hpp" #include "operations/OpBase.hpp" +#include "operations/OpImageCopy.hpp" +#include "operations/OpImageCopyToTensor.hpp" +#include "operations/OpImageSyncDevice.hpp" +#include "operations/OpImageSyncLocal.hpp" #include "operations/OpMemoryBarrier.hpp" #include "operations/OpMult.hpp" #include "operations/OpTensorCopy.hpp" +#include "operations/OpTensorCopyToImage.hpp" #include "operations/OpTensorSyncDevice.hpp" #include "operations/OpTensorSyncLocal.hpp" diff --git a/src/include/kompute/Manager.hpp b/src/include/kompute/Manager.hpp index 52f9ada7..aa39c2d1 100644 --- a/src/include/kompute/Manager.hpp +++ b/src/include/kompute/Manager.hpp @@ -6,6 +6,7 @@ #include "kompute/Core.hpp" +#include "kompute/Image.hpp" #include "kompute/Sequence.hpp" #include "logger/Logger.hpp" @@ -81,7 +82,7 @@ class Manager template std::shared_ptr> tensorT( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + Tensor::MemoryTypes tensorType = Tensor::MemoryTypes::eDevice) { KP_LOG_DEBUG("Kompute Manager tensor creation triggered"); @@ -97,7 +98,7 @@ class Manager std::shared_ptr> tensor( const std::vector& data, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + Tensor::MemoryTypes tensorType = Tensor::MemoryTypes::eDevice) { return this->tensorT(data, tensorType); } @@ -107,7 +108,7 @@ class Manager uint32_t elementTotalCount, uint32_t elementMemorySize, const Tensor::TensorDataTypes& dataType, - Tensor::TensorTypes tensorType = Tensor::TensorTypes::eDevice) + Tensor::MemoryTypes tensorType = Tensor::MemoryTypes::eDevice) { std::shared_ptr tensor{ new kp::Tensor(this->mPhysicalDevice, this->mDevice, @@ -124,12 +125,197 @@ class Manager return tensor; } + /** + * Create a managed image that will be destroyed by this manager + * if it hasn't been destroyed by its reference count going to zero. + * + * @param data The data to initialize the image with + * @param tensorType The type of image to initialize + * @returns Shared pointer with initialised image + */ + template + std::shared_ptr> imageT( + const std::vector& data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + vk::ImageTiling tiling, + Image::MemoryTypes imageType = Image::MemoryTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager image creation triggered"); + + std::shared_ptr> image{ new kp::ImageT( + this->mPhysicalDevice, + this->mDevice, + data, + width, + height, + numChannels, + tiling, + imageType) }; + + if (this->mManageResources) { + this->mManagedImages.push_back(image); + } + + return image; + } + + template + std::shared_ptr> imageT( + const std::vector& data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + Image::MemoryTypes imageType = Image::MemoryTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager image creation triggered"); + + std::shared_ptr> image{ new kp::ImageT( + this->mPhysicalDevice, + this->mDevice, + data, + width, + height, + numChannels, + imageType) }; + + if (this->mManageResources) { + this->mManagedImages.push_back(image); + } + + return image; + } + + template + std::shared_ptr> imageT( + uint32_t width, + uint32_t height, + uint32_t numChannels, + vk::ImageTiling tiling, + Image::MemoryTypes imageType = Image::MemoryTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager image creation triggered"); + + std::shared_ptr> image{ new kp::ImageT( + this->mPhysicalDevice, + this->mDevice, + width, + height, + numChannels, + tiling, + imageType) }; + + if (this->mManageResources) { + this->mManagedImages.push_back(image); + } + + return image; + } + + template + std::shared_ptr> imageT( + uint32_t width, + uint32_t height, + uint32_t numChannels, + Image::MemoryTypes imageType = Image::MemoryTypes::eDevice) + { + KP_LOG_DEBUG("Kompute Manager image creation triggered"); + + std::shared_ptr> image{ new kp::ImageT( + this->mPhysicalDevice, + this->mDevice, + width, + height, + numChannels, + imageType) }; + + if (this->mManageResources) { + this->mManagedImages.push_back(image); + } + + return image; + } + + std::shared_ptr> image( + const std::vector& data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + vk::ImageTiling tiling, + Image::MemoryTypes imageType = Image::MemoryTypes::eDevice) + { + return this->imageT( + data, width, height, numChannels, tiling, imageType); + } + + std::shared_ptr> image( + const std::vector& data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + Image::MemoryTypes imageType = Image::MemoryTypes::eDevice) + { + return this->imageT(data, width, height, numChannels, imageType); + } + + std::shared_ptr image( + void* data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + const Image::ImageDataTypes& dataType, + vk::ImageTiling tiling, + Image::MemoryTypes imageType = Image::MemoryTypes::eDevice) + { + std::shared_ptr image{ new kp::Image(this->mPhysicalDevice, + this->mDevice, + data, + width, + height, + numChannels, + dataType, + tiling, + imageType) }; + + if (this->mManageResources) { + this->mManagedImages.push_back(image); + } + + return image; + } + + std::shared_ptr image( + void* data, + uint32_t width, + uint32_t height, + uint32_t numChannels, + const Image::ImageDataTypes& dataType, + Image::MemoryTypes imageType = Image::MemoryTypes::eDevice) + { + std::shared_ptr image{ new kp::Image(this->mPhysicalDevice, + this->mDevice, + data, + width, + height, + numChannels, + dataType, + imageType) }; + + if (this->mManageResources) { + this->mManagedImages.push_back(image); + } + + return image; + } + /** * Default non-template function that can be used to create algorithm * objects which provides default types to the push and spec constants as * floats. * - * @param tensors (optional) The tensors to initialise the algorithm with + * @param memObjects (optional) The mem objects to initialise the algorithm + * with * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch * @param workgroup (optional) kp::Workgroup for algorithm to use, and * defaults to (tensor[0].size(), 1, 1) @@ -140,21 +326,22 @@ class Manager * @returns Shared pointer with initialised algorithm */ std::shared_ptr algorithm( - const std::vector>& tensors = {}, + const std::vector>& memObjects = {}, const std::vector& spirv = {}, const Workgroup& workgroup = {}, const std::vector& specializationConstants = {}, const std::vector& pushConstants = {}) { return this->algorithm<>( - tensors, spirv, workgroup, specializationConstants, pushConstants); + memObjects, spirv, workgroup, specializationConstants, pushConstants); } /** * Create a managed algorithm that will be destroyed by this manager * if it hasn't been destroyed by its reference count going to zero. * - * @param tensors (optional) The tensors to initialise the algorithm with + * @param memObjects (optional) The mem objects to initialise the algorithm + * with * @param spirv (optional) The SPIRV bytes for the algorithm to dispatch * @param workgroup (optional) kp::Workgroup for algorithm to use, and * defaults to (tensor[0].size(), 1, 1) @@ -166,7 +353,7 @@ class Manager */ template std::shared_ptr algorithm( - const std::vector>& tensors, + const std::vector>& memObjects, const std::vector& spirv, const Workgroup& workgroup, const std::vector& specializationConstants, @@ -177,7 +364,7 @@ class Manager std::shared_ptr algorithm{ new kp::Algorithm( this->mDevice, - tensors, + memObjects, spirv, workgroup, specializationConstants, @@ -233,6 +420,7 @@ class Manager // -------------- ALWAYS OWNED RESOURCES std::vector> mManagedTensors; + std::vector> mManagedImages; std::vector> mManagedSequences; std::vector> mManagedAlgorithms; diff --git a/src/include/kompute/Memory.hpp b/src/include/kompute/Memory.hpp new file mode 100644 index 00000000..7da8c59b --- /dev/null +++ b/src/include/kompute/Memory.hpp @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/Core.hpp" +#include "logger/Logger.hpp" +#include +#include + +namespace kp { + +class Memory +{ + // This is the base class for Tensors and Images. + // It's required so that algorithms and sequences can mix tensors and + // images. + // FIXME: Common-ise lots of the code that is the same between images and + // tensors into this base class. + public: + /** + * Type for memory created: Device allows memory to be transferred from + * staging memory. Staging are host memory visible. Storage are device + * visible but are not set up to transfer or receive data (only for shader + * storage). + */ + enum class MemoryTypes + { + eDevice = 0, ///< Type is device memory, source and destination + eHost = 1, ///< Type is host memory, source and destination + eStorage = 2, ///< Type is Device memory (only) + eDeviceAndHost = + 3, ///< Type is host-visible and host-coherent device memory + }; + + static std::string toString(MemoryTypes dt); + + /** + * Destructor which is in charge of freeing vulkan resources unless they + * have been provided externally. + */ + virtual ~Memory(){}; + + /** + * Retrieve the image type of the image + * + * @return image type of image + */ + MemoryTypes memoryType(); + + /** + * Check whether tensor/image is initialized based on the created gpu + * resources. + * + * @returns Boolean stating whether tensor is initialized + */ + virtual bool isInit() = 0; + + /** + * Records a copy from the internal staging memory to the device memory + * using an optional barrier to wait for the operation. This function would + * only be relevant for kp::Tensors of type eDevice. + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + */ + virtual void recordCopyFromStagingToDevice( + const vk::CommandBuffer& commandBuffer) = 0; + + /** + * Records a copy from the internal device memory to the staging memory + * using an optional barrier to wait for the operation. This function would + * only be relevant for kp::Tensors of type eDevice. + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + */ + virtual void recordCopyFromDeviceToStaging( + const vk::CommandBuffer& commandBuffer) = 0; + /** + * Records the buffer memory barrier into the primary buffer and command + * buffer which ensures that relevant data transfers are carried out + * correctly. + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + * @param srcAccessMask Access flags for source access mask + * @param dstAccessMask Access flags for destination access mask + * @param scrStageMask Pipeline stage flags for source stage mask + * @param dstStageMask Pipeline stage flags for destination stage mask + */ + virtual void recordPrimaryMemoryBarrier( + const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask) = 0; + /** + * Records the buffer memory barrier into the staging buffer and command + * buffer which ensures that relevant data transfers are carried out + * correctly. + * + * @param commandBuffer Vulkan Command Buffer to record the commands into + * @param srcAccessMask Access flags for source access mask + * @param dstAccessMask Access flags for destination access mask + * @param scrStageMask Pipeline stage flags for source stage mask + * @param dstStageMask Pipeline stage flags for destination stage mask + */ + virtual void recordStagingMemoryBarrier( + const vk::CommandBuffer& commandBuffer, + vk::AccessFlagBits srcAccessMask, + vk::AccessFlagBits dstAccessMask, + vk::PipelineStageFlagBits srcStageMask, + vk::PipelineStageFlagBits dstStageMask) = 0; + + /** + * Adds this object to a Vulkan descriptor set at \p binding. + * + * @param descriptorSet The descriptor set to add to. + * @param binding The binding number to use. + * @return Add this object to a descriptor set at \p binding. + */ + virtual vk::WriteDescriptorSet constructDescriptorSet( + vk::DescriptorSet descriptorSet, + uint32_t binding) = 0; + + /** + * Returns the size/magnitude of the Tensor/Image, which will be the total + * number of elements across all dimensions + * + * @return Unsigned integer representing the total number of elements + */ + uint32_t size(); + + /** + * Returns the total size of a single element of the respective data type + * that this memory object holds. + * + * @return Unsigned integer representing the memory of a single element of + * the respective data type. + */ + uint32_t dataTypeMemorySize(); + + /** + * Returns the total memory size of the data contained by the memory object + * which would equate to (this->size() * this->dataTypeMemorySize()) + * + * @return Unsigned integer representing the total memory size of the data + * contained by the image object. + */ + uint32_t memorySize(); + + vk::DescriptorType getDescriptorType() { return mDescriptorType; } + + /** + * Retrieve the raw data via the pointer to the memory that contains the raw + * memory of this current tensor. This tensor gets changed to a nullptr when + * the Tensor is removed. + * + * @return Pointer to raw memory containing raw bytes data of Tensor/Image. + */ + void* rawData(); + + /** + * Sets / resets the data of the tensor/image which is directly done on the + * GPU host visible memory available by the tensor. + */ + void setRawData(const void* data); + + /** + * Template to return the pointer data converted by specific type, which + * would be any of the supported types including float, double, int32, + * uint32 and bool. + * + * @return Pointer to raw memory containing raw bytes data of Tensor/Image. + */ + template + T* data() + { + if (this->mRawData == nullptr) { + this->mapRawData(); + } + + return (T*)this->mRawData; + } + + /** + * Return the pointer data cast to float. + * + * @return Pointer to raw memory containing raw bytes data of Tensor/Image. + * This is the default, for convenience. + */ + float* data() { return this->data(); } + + /** + * Template to get the data of the current tensor/image as a vector of + * specific type, which would be any of the supported types including float, + * double, int32, uint32 and bool. + * + * @return Vector of type provided by template. + */ + template + std::vector vector() + { + if (this->mRawData == nullptr) { + this->mapRawData(); + } + + return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; + } + + /** + * Get the data of the current tensor/image as a vector of float. + * This is the default, for convenience. + * + * @return Vector of floats. + */ + std::vector vector() { return this->vector(); } + + protected: + // -------------- ALWAYS OWNED RESOURCES + MemoryTypes mMemoryType; + uint32_t mSize; + uint32_t mDataTypeMemorySize; + void* mRawData = nullptr; + vk::DescriptorType mDescriptorType; + bool mUnmapMemory = false; + + // -------------- NEVER OWNED RESOURCES + std::shared_ptr mPhysicalDevice; + std::shared_ptr mDevice; + + // -------------- OPTIONALLY OWNED RESOURCES + std::shared_ptr mPrimaryMemory; + bool mFreePrimaryMemory = false; + std::shared_ptr mStagingMemory; + bool mFreeStagingMemory = false; + + // Private util functions + void mapRawData(); + void unmapRawData(); +}; + +} // End namespace kp diff --git a/src/include/kompute/Sequence.hpp b/src/include/kompute/Sequence.hpp index de9b9f69..a3b1be17 100644 --- a/src/include/kompute/Sequence.hpp +++ b/src/include/kompute/Sequence.hpp @@ -53,17 +53,18 @@ class Sequence : public std::enable_shared_from_this * function also requires the Sequence to be recording, otherwise it will * not be able to add the operation. * - * @param tensors Vector of tensors to use for the operation + * @param memObjects Vector of mem objects to use for the operation * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. * @return shared_ptr of the Sequence class itself */ template std::shared_ptr record( - std::vector> tensors, + std::vector> memObjects, TArgs&&... params) { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(memObjects, + std::forward(params)...) }; return this->record(op); } /** @@ -108,16 +109,18 @@ class Sequence : public std::enable_shared_from_this * Eval sends all the recorded and stored operations in the vector of * operations into the gpu as a submit job with a barrier. * - * @param tensors Vector of tensors to use for the operation + * @param memObjects Vector of memory objects to use for the operation * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. * @return shared_ptr of the Sequence class itself */ template - std::shared_ptr eval(std::vector> tensors, + std::shared_ptr eval( + std::vector> memObjects, TArgs&&... params) { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(memObjects, + std::forward(params)...) }; return this->eval(op); } /** @@ -161,17 +164,18 @@ class Sequence : public std::enable_shared_from_this * Eval sends all the recorded and stored operations in the vector of * operations into the gpu as a submit job with a barrier. * - * @param tensors Vector of tensors to use for the operation + * @param memObjects Vector of memory objects to use for the operation * @param TArgs Template parameters that are used to initialise operation * which allows for extensible configurations on initialisation. * @return shared_ptr of the Sequence class itself */ template std::shared_ptr evalAsync( - std::vector> tensors, + std::vector> memObjects, TArgs&&... params) { - std::shared_ptr op{ new T(tensors, std::forward(params)...) }; + std::shared_ptr op{ new T(memObjects, + std::forward(params)...) }; return this->evalAsync(op); } /** @@ -243,7 +247,7 @@ class Sequence : public std::enable_shared_from_this /** * Clears command buffer and triggers re-record of all the current - * operations saved, which is useful if the underlying kp::Tensors or + * operations saved, which is useful if the underlying kp::Memorys or * kp::Algorithms are modified and need to be re-recorded. */ void rerecord(); diff --git a/src/include/kompute/Tensor.hpp b/src/include/kompute/Tensor.hpp index e39cc64c..b4c9f7e3 100644 --- a/src/include/kompute/Tensor.hpp +++ b/src/include/kompute/Tensor.hpp @@ -2,12 +2,15 @@ #pragma once #include "kompute/Core.hpp" +#include "kompute/Memory.hpp" #include "logger/Logger.hpp" #include #include namespace kp { +// Forward-declare the Image class +class Image; /** * Structured data used in GPU operations. * @@ -16,21 +19,9 @@ namespace kp { * would be used to store their respective data. The tensors can be used for GPU * data storage or transfer. */ -class Tensor +class Tensor : public Memory { public: - /** - * Type for tensors created: Device allows memory to be transferred from - * staging buffers. Staging are host memory visible. Storage are device - * visible but are not set up to transfer or receive data (only for shader - * storage). - */ - enum class TensorTypes - { - eDevice = 0, ///< Type is device memory, source and destination - eHost = 1, ///< Type is host memory, source and destination - eStorage = 2, ///< Type is Device memory (only) - }; enum class TensorDataTypes { eBool = 0, @@ -42,7 +33,6 @@ class Tensor }; static std::string toString(TensorDataTypes dt); - static std::string toString(TensorTypes dt); /** * Constructor with data provided which would be used to create the @@ -52,7 +42,7 @@ class Tensor * @param device The device to use to create the buffer and memory from * @param data Non-zero-sized vector of data that will be used by the * tensor - * @param tensorTypes Type for the tensor which is of type TensorTypes + * @param tensorTypes Type for the tensor which is of type MemoryTypes */ Tensor(std::shared_ptr physicalDevice, std::shared_ptr device, @@ -60,7 +50,7 @@ class Tensor uint32_t elementTotalCount, uint32_t elementMemorySize, const TensorDataTypes& dataType, - const TensorTypes& tensorType = TensorTypes::eDevice); + const MemoryTypes& tensorType = MemoryTypes::eDevice); /** * Destructor which is in charge of freeing vulkan resources unless they @@ -92,22 +82,26 @@ class Tensor bool isInit(); /** - * Retrieve the tensor type of the Tensor + * Records a copy from the memory of the tensor provided to the current + * tensor. This is intended to pass memory into a processing, to perform + * a staging buffer transfer, or to gather output (between others). * - * @return Tensor type of tensor + * @param commandBuffer Vulkan Command Buffer to record the commands into + * @param copyFromTensor Tensor to copy the data from */ - TensorTypes tensorType(); + void recordCopyFrom(const vk::CommandBuffer& commandBuffer, + std::shared_ptr copyFromTensor); /** - * Records a copy from the memory of the tensor provided to the current - * thensor. This is intended to pass memory into a processing, to perform + * Records a copy from the memory of the image provided to the current + * tensor. This is intended to pass memory into a processing, to perform * a staging buffer transfer, or to gather output (between others). * * @param commandBuffer Vulkan Command Buffer to record the commands into - * @param copyFromTensor Tensor to copy the data from + * @param copyFromImage Image to copy the data from */ void recordCopyFrom(const vk::CommandBuffer& commandBuffer, - std::shared_ptr copyFromTensor); + std::shared_ptr copyFromImage); /** * Records a copy from the internal staging memory to the device memory @@ -128,7 +122,7 @@ class Tensor void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer); /** - * Records the buffer memory barrier into the primary buffer and command + * Records the memory barrier into the primary buffer and command * buffer which ensures that relevant data transfers are carried out * correctly. * @@ -138,14 +132,13 @@ class Tensor * @param scrStageMask Pipeline stage flags for source stage mask * @param dstStageMask Pipeline stage flags for destination stage mask */ - void recordPrimaryBufferMemoryBarrier( - const vk::CommandBuffer& commandBuffer, + void recordPrimaryMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::AccessFlagBits srcAccessMask, vk::AccessFlagBits dstAccessMask, vk::PipelineStageFlagBits srcStageMask, vk::PipelineStageFlagBits dstStageMask); /** - * Records the buffer memory barrier into the staging buffer and command + * Records the memory barrier into the staging buffer and command * buffer which ensures that relevant data transfers are carried out * correctly. * @@ -155,47 +148,22 @@ class Tensor * @param scrStageMask Pipeline stage flags for source stage mask * @param dstStageMask Pipeline stage flags for destination stage mask */ - void recordStagingBufferMemoryBarrier( - const vk::CommandBuffer& commandBuffer, + void recordStagingMemoryBarrier(const vk::CommandBuffer& commandBuffer, vk::AccessFlagBits srcAccessMask, vk::AccessFlagBits dstAccessMask, vk::PipelineStageFlagBits srcStageMask, vk::PipelineStageFlagBits dstStageMask); /** - * Constructs a vulkan descriptor buffer info which can be used to specify - * and reference the underlying buffer component of the tensor without - * exposing it. - * - * @return Descriptor buffer info with own buffer - */ - vk::DescriptorBufferInfo constructDescriptorBufferInfo(); - - /** - * Returns the size/magnitude of the Tensor, which will be the total number - * of elements across all dimensions - * - * @return Unsigned integer representing the total number of elements - */ - uint32_t size(); - - /** - * Returns the total size of a single element of the respective data type - * that this tensor holds. - * - * @return Unsigned integer representing the memory of a single element of - * the respective data type. - */ - uint32_t dataTypeMemorySize(); - - /** - * Returns the total memory size of the data contained by the Tensor object - * which would equate to (this->size() * this->dataTypeMemorySize()) + * Adds this object to a Vulkan descriptor set at \p binding. * - * @return Unsigned integer representing the memory of a single element of - * the respective data type. + * @param descriptorSet The descriptor set to add to. + * @param binding The binding number to use. + * @return Add this object to a descriptor set at \p binding. */ - uint32_t memorySize(); + vk::WriteDescriptorSet constructDescriptorSet( + vk::DescriptorSet descriptorSet, + uint32_t binding); /** * Retrieve the data type of the tensor (host, device, storage) @@ -204,69 +172,20 @@ class Tensor */ TensorDataTypes dataType(); - /** - * Retrieve the raw data via the pointer to the memory that contains the raw - * memory of this current tensor. This tensor gets changed to a nullptr when - * the Tensor is removed. - * - * @return Pointer to raw memory containing raw bytes data of Tensor. - */ - void* rawData(); - - /** - * Sets / resets the data of the tensor which is directly done on the GPU - * host visible memory available by the tensor. - */ - void setRawData(const void* data); - - /** - * Template to return the pointer data converted by specific type, which - * would be any of the supported types including float, double, int32, - * uint32 and bool. - * - * @return Pointer to raw memory containing raw bytes data of Tensor. - */ - template - T* data() - { - return (T*)this->mRawData; - } - - /** - * Template to get the data of the current tensor as a vector of specific - * type, which would be any of the supported types including float, double, - * int32, uint32 and bool. - * - * @return Vector of type provided by template. - */ - template - std::vector vector() - { - return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; - } + std::shared_ptr getPrimaryBuffer(); protected: // -------------- ALWAYS OWNED RESOURCES - TensorTypes mTensorType; TensorDataTypes mDataType; - uint32_t mSize; - uint32_t mDataTypeMemorySize; - void* mRawData; + vk::DescriptorBufferInfo mDescriptorBufferInfo; private: - // -------------- NEVER OWNED RESOURCES - std::shared_ptr mPhysicalDevice; - std::shared_ptr mDevice; // -------------- OPTIONALLY OWNED RESOURCES std::shared_ptr mPrimaryBuffer; bool mFreePrimaryBuffer = false; std::shared_ptr mStagingBuffer; bool mFreeStagingBuffer = false; - std::shared_ptr mPrimaryMemory; - bool mFreePrimaryMemory = false; - std::shared_ptr mStagingMemory; - bool mFreeStagingMemory = false; void allocateMemoryCreateGPUResources(); // Creates the vulkan buffer void createBuffer(std::shared_ptr buffer, @@ -279,6 +198,11 @@ class Tensor std::shared_ptr bufferTo, vk::DeviceSize bufferSize, vk::BufferCopy copyRegion); + void recordCopyBufferFromImage(const vk::CommandBuffer& commandBuffer, + std::shared_ptr imageFrom, + std::shared_ptr bufferTo, + vk::DeviceSize /*bufferSize*/, + vk::BufferImageCopy copyRegion); void recordBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer, const vk::Buffer& buffer, vk::AccessFlagBits srcAccessMask, @@ -292,8 +216,7 @@ class Tensor vk::BufferUsageFlags getStagingBufferUsageFlags(); vk::MemoryPropertyFlags getStagingMemoryPropertyFlags(); - void mapRawData(); - void unmapRawData(); + vk::DescriptorBufferInfo constructDescriptorBufferInfo(); }; template @@ -304,7 +227,7 @@ class TensorT : public Tensor TensorT(std::shared_ptr physicalDevice, std::shared_ptr device, const std::vector& data, - const TensorTypes& tensorType = TensorTypes::eDevice) + const MemoryTypes& tensorType = MemoryTypes::eDevice) : Tensor(physicalDevice, device, (void*)data.data(), @@ -319,14 +242,8 @@ class TensorT : public Tensor ~TensorT() { KP_LOG_DEBUG("Kompute TensorT destructor"); } - T* data() { return (T*)this->mRawData; } - - std::vector vector() - { - return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() }; - } + std::vector vector() { return Memory::vector(); } - T& operator[](int index) { return *(((T*)this->mRawData) + index); } void setData(const std::vector& data) { diff --git a/src/include/kompute/operations/OpBase.hpp b/src/include/kompute/operations/OpBase.hpp index 73767084..f411c5aa 100644 --- a/src/include/kompute/operations/OpBase.hpp +++ b/src/include/kompute/operations/OpBase.hpp @@ -3,6 +3,7 @@ #include "kompute/Algorithm.hpp" #include "kompute/Core.hpp" +#include "kompute/Image.hpp" #include "kompute/Tensor.hpp" namespace kp { diff --git a/src/include/kompute/operations/OpImageCopy.hpp b/src/include/kompute/operations/OpImageCopy.hpp new file mode 100644 index 00000000..424480a8 --- /dev/null +++ b/src/include/kompute/operations/OpImageCopy.hpp @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/Core.hpp" + +#include "kompute/Image.hpp" + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +/** + * Operation that copies the data from the first image to the rest of the + * images provided, using a record command for all the vectors. This operation + * does not own/manage the memory of the images passed to it. The operation + * must only receive images of type + */ +class OpImageCopy : public OpBase +{ + public: + /** + * Default constructor with parameters that provides the core vulkan + * resources and the images that will be used in the operation. + * + * @param images Images that will be used to create in operation. + */ + OpImageCopy(const std::vector>& images); + + /** + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. + */ + ~OpImageCopy() override; + + /** + * Records the copy commands from the first image into all the other + * images provided. Also optionally records a barrier. + * + * @param commandBuffer The command buffer to record the command into. + */ + void record(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void preEval(const vk::CommandBuffer& commandBuffer) override; + + /** + * Copies the local vectors for all the images to sync the data with the + * gpu. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void postEval(const vk::CommandBuffer& commandBuffer) override; + + private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mImages; +}; + +} // End namespace kp diff --git a/src/include/kompute/operations/OpImageCopyToTensor.hpp b/src/include/kompute/operations/OpImageCopyToTensor.hpp new file mode 100644 index 00000000..a503dbbb --- /dev/null +++ b/src/include/kompute/operations/OpImageCopyToTensor.hpp @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/Core.hpp" + +#include "kompute/Image.hpp" + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +/** + * Operation that copies the data from the first image to the rest of the + * images provided, using a record command for all the vectors. This operation + * does not own/manage the memory of the images passed to it. The operation + * must only receive images of type + */ +class OpImageCopyToTensor : public OpBase +{ + public: + /** + * Default constructor with parameters that provides the core vulkan + * resources and the images that will be used in the operation. + * + * @param images Images that will be used to create in operation. + */ + OpImageCopyToTensor( + const std::vector>& mem_objects); + + /** + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. + */ + ~OpImageCopyToTensor() override; + + /** + * Records the copy commands from the first image into all the other + * images provided. Also optionally records a barrier. + * + * @param commandBuffer The command buffer to record the command into. + */ + void record(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void preEval(const vk::CommandBuffer& commandBuffer) override; + + /** + * Copies the local vectors for all the images to sync the data with the + * gpu. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void postEval(const vk::CommandBuffer& commandBuffer) override; + + private: + // -------------- ALWAYS OWNED RESOURCES + std::shared_ptr mImage; + std::vector> mTensors; +}; + +} // End namespace kp diff --git a/src/include/kompute/operations/OpImageSyncDevice.hpp b/src/include/kompute/operations/OpImageSyncDevice.hpp new file mode 100644 index 00000000..433d217d --- /dev/null +++ b/src/include/kompute/operations/OpImageSyncDevice.hpp @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/Core.hpp" +#include "kompute/Image.hpp" +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +/** + * Operation that syncs image's device by mapping local data into the device + * memory. For ImageTypes::eDevice it will use a record operation for the + * memory to be syncd into GPU memory which means that the operation will be + * done in sync with GPU commands. For ImageTypes::eHost it will only map the + * data into host memory which will happen during preEval before the recorded + * commands are dispatched. + */ +class OpImageSyncDevice : public OpBase +{ + public: + /** + * Default constructor with parameters that provides the core vulkan + * resources and the images that will be used in the operation. The tensos + * provided cannot be of type ImageTypes::eStorage. + * + * @param images Images that will be used to create in operation. + */ + OpImageSyncDevice(const std::vector>& images); + + /** + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. + */ + ~OpImageSyncDevice() override; + + /** + * For device images, it records the copy command for the image to copy + * the data from its staging to device memory. + * + * @param commandBuffer The command buffer to record the command into. + */ + void record(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void preEval(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any postEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void postEval(const vk::CommandBuffer& commandBuffer) override; + + private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mImages; +}; + +} // End namespace kp diff --git a/src/include/kompute/operations/OpImageSyncLocal.hpp b/src/include/kompute/operations/OpImageSyncLocal.hpp new file mode 100644 index 00000000..e041af0f --- /dev/null +++ b/src/include/kompute/operations/OpImageSyncLocal.hpp @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/Core.hpp" + +#include "kompute/Image.hpp" + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +/** + * Operation that syncs image's local memory by mapping device data into the + * local CPU memory. For ImageTypes::eDevice it will use a record operation + * for the memory to be syncd into GPU memory which means that the operation + * will be done in sync with GPU commands. For ImageTypes::eHost it will + * only map the data into host memory which will happen during preEval before + * the recorded commands are dispatched. + */ +class OpImageSyncLocal : public OpBase +{ + public: + /** + * Default constructor with parameters that provides the core vulkan + * resources and the images that will be used in the operation. The images + * provided cannot be of type ImageTypes::eStorage. + * + * @param images Images that will be used to create in operation. + */ + OpImageSyncLocal(const std::vector>& images); + + /** + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. + */ + ~OpImageSyncLocal() override; + + /** + * For device images, it records the copy command for the image to copy + * the data from its device to staging memory. + * + * @param commandBuffer The command buffer to record the command into. + */ + void record(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void preEval(const vk::CommandBuffer& commandBuffer) override; + + /** + * For host images it performs the map command from the host memory into + * local memory. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void postEval(const vk::CommandBuffer& commandBuffer) override; + + private: + // -------------- ALWAYS OWNED RESOURCES + std::vector> mImages; +}; + +} // End namespace kp diff --git a/src/include/kompute/operations/OpMemoryBarrier.hpp b/src/include/kompute/operations/OpMemoryBarrier.hpp index 4a232232..55866538 100644 --- a/src/include/kompute/operations/OpMemoryBarrier.hpp +++ b/src/include/kompute/operations/OpMemoryBarrier.hpp @@ -19,11 +19,11 @@ class OpMemoryBarrier : public OpBase { public: /** - * Constructor that stores tensors as well as memory barrier parameters to - * be used to create a pipeline barrier on the respective primary or staging - * tensor. + * Constructor that stores mem objects as well as memory barrier parameters + * to be used to create a pipeline barrier on the respective primary or + * staging tensor. * - * @param tensors The tensors to apply the memory barriers on + * @param memObjects The mem objects to apply the memory barriers on * @param srcAccessMask The kp::AccessFlagBits for the source access mask * @param dstAccessMask The kp::AccessFlagBits for the destination access * mask @@ -32,9 +32,9 @@ class OpMemoryBarrier : public OpBase * @param dstStageMask The kp::PipelineStageFlagBits for the destination * stage mask * @param barrierOnPrimary Boolean to select primary or secondary buffers on - * tensors + * mem objects */ - OpMemoryBarrier(const std::vector>& tensors, + OpMemoryBarrier(const std::vector>& memObjects, const vk::AccessFlagBits& srcAccessMask, const vk::AccessFlagBits& dstAccessMask, const vk::PipelineStageFlagBits& srcStageMask, @@ -75,7 +75,7 @@ class OpMemoryBarrier : public OpBase const vk::PipelineStageFlagBits mSrcStageMask; const vk::PipelineStageFlagBits mDstStageMask; const bool mBarrierOnPrimary; - const std::vector> mTensors; + const std::vector> mMemObjects; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpMult.hpp b/src/include/kompute/operations/OpMult.hpp index f75ccc4f..201754e2 100644 --- a/src/include/kompute/operations/OpMult.hpp +++ b/src/include/kompute/operations/OpMult.hpp @@ -26,26 +26,26 @@ class OpMult : public OpAlgoDispatch * requirements for the operations to be able to create and manage their * sub-components. * - * @param tensors Tensors that are to be used in this operation + * @param memObjects Memory objects that are to be used in this operation * @param algorithm An algorithm that will be overridden with the OpMult * shader data and the tensors provided which are expected to be 3 */ - OpMult(std::vector> tensors, + OpMult(std::vector> memObjects, std::shared_ptr algorithm) : OpAlgoDispatch(algorithm) { KP_LOG_DEBUG("Kompute OpMult constructor with params"); - if (tensors.size() != 3) { + if (memObjects.size() != 3) { throw std::runtime_error( - "Kompute OpMult expected 3 tensors but got " + - std::to_string(tensors.size())); + "Kompute OpMult expected 3 mem objects but got " + + std::to_string(memObjects.size())); } const std::vector spirv = std::vector( SHADEROPMULT_COMP_SPV.begin(), SHADEROPMULT_COMP_SPV.end()); - algorithm->rebuild<>(tensors, spirv); + algorithm->rebuild<>(memObjects, spirv); } /** diff --git a/src/include/kompute/operations/OpTensorCopy.hpp b/src/include/kompute/operations/OpTensorCopy.hpp index 968c1065..78bac369 100644 --- a/src/include/kompute/operations/OpTensorCopy.hpp +++ b/src/include/kompute/operations/OpTensorCopy.hpp @@ -24,7 +24,7 @@ class OpTensorCopy : public OpBase * * @param tensors Tensors that will be used to create in operation. */ - OpTensorCopy(const std::vector>& tensors); + OpTensorCopy(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be diff --git a/src/include/kompute/operations/OpTensorCopyToImage.hpp b/src/include/kompute/operations/OpTensorCopyToImage.hpp new file mode 100644 index 00000000..8c009c4c --- /dev/null +++ b/src/include/kompute/operations/OpTensorCopyToImage.hpp @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: Apache-2.0 +#pragma once + +#include "kompute/Core.hpp" + +#include "kompute/Tensor.hpp" + +#include "kompute/operations/OpBase.hpp" + +namespace kp { + +/** + * Operation that copies the data from the first tensor to the vector of images + * provided, using a record command for all the vectors. This operation + * does not own/manage the memory of the tensor/images passed to it. + */ +class OpTensorCopyToImage : public OpBase +{ + public: + /** + * Default constructor with parameters that provides the core vulkan + * resources and the tensors/images that will be used in the operation. + * + * @param tensors Tensors that will be used to create in operation. + */ + OpTensorCopyToImage( + const std::vector>& mem_objects); + + /** + * Default destructor. This class does not manage memory so it won't be + * expecting the parent to perform a release. + */ + ~OpTensorCopyToImage() override; + + /** + * Records the copy commands from the first tensor into all the other + * tensors provided. Also optionally records a barrier. + * + * @param commandBuffer The command buffer to record the command into. + */ + void record(const vk::CommandBuffer& commandBuffer) override; + + /** + * Does not perform any preEval commands. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void preEval(const vk::CommandBuffer& commandBuffer) override; + + /** + * Copies the local vectors for all the tensors to sync the data with the + * gpu. + * + * @param commandBuffer The command buffer to record the command into. + */ + virtual void postEval(const vk::CommandBuffer& commandBuffer) override; + + private: + // -------------- ALWAYS OWNED RESOURCES + std::shared_ptr mTensor; + std::vector> mImages; +}; + +} // End namespace kp diff --git a/src/include/kompute/operations/OpTensorSyncDevice.hpp b/src/include/kompute/operations/OpTensorSyncDevice.hpp index 3a1792ac..c2859e79 100644 --- a/src/include/kompute/operations/OpTensorSyncDevice.hpp +++ b/src/include/kompute/operations/OpTensorSyncDevice.hpp @@ -25,7 +25,7 @@ class OpTensorSyncDevice : public OpBase * * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncDevice(const std::vector>& tensors); + OpTensorSyncDevice(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be @@ -57,7 +57,7 @@ class OpTensorSyncDevice : public OpBase private: // -------------- ALWAYS OWNED RESOURCES - std::vector> mTensors; + std::vector> mTensors; }; } // End namespace kp diff --git a/src/include/kompute/operations/OpTensorSyncLocal.hpp b/src/include/kompute/operations/OpTensorSyncLocal.hpp index 4216003e..7564c0c1 100644 --- a/src/include/kompute/operations/OpTensorSyncLocal.hpp +++ b/src/include/kompute/operations/OpTensorSyncLocal.hpp @@ -10,10 +10,10 @@ namespace kp { /** - * Operation that syncs tensor's local memory by mapping device data into the - * local CPU memory. For TensorTypes::eDevice it will use a record operation + * Operation that syncs mem object's local memory by mapping device data into + * the local CPU memory. For MemoryTypes::eDevice it will use a record operation * for the memory to be syncd into GPU memory which means that the operation - * will be done in sync with GPU commands. For TensorTypes::eHost it will + * will be done in sync with GPU commands. For MemoryTypes::eHost it will * only map the data into host memory which will happen during preEval before * the recorded commands are dispatched. */ @@ -22,12 +22,12 @@ class OpTensorSyncLocal : public OpBase public: /** * Default constructor with parameters that provides the core vulkan - * resources and the tensors that will be used in the operation. The tensors - * provided cannot be of type TensorTypes::eStorage. + * resources and the memory that will be used in the operation. The memory + * provided cannot be of type MemoryTypes::eStorage. * * @param tensors Tensors that will be used to create in operation. */ - OpTensorSyncLocal(const std::vector>& tensors); + OpTensorSyncLocal(const std::vector>& tensors); /** * Default destructor. This class does not manage memory so it won't be @@ -60,7 +60,7 @@ class OpTensorSyncLocal : public OpBase private: // -------------- ALWAYS OWNED RESOURCES - std::vector> mTensors; + std::vector> mTensors; }; } // End namespace kp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e2dcab3b..c213be94 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -17,11 +17,19 @@ add_executable(kompute_tests TestAsyncOperations.cpp TestMultipleAlgoExecutions.cpp TestOpShadersFromStringAndFile.cpp TestOpTensorCopy.cpp + TestOpTensorCopyToImage.cpp TestOpTensorCreate.cpp + TestOpTensorSync.cpp TestPushConstant.cpp TestSequence.cpp TestSpecializationConstant.cpp - TestWorkgroup.cpp) + TestWorkgroup.cpp + TestTensor.cpp + TestImage.cpp + TestOpImageCreate.cpp + TestOpImageCopy.cpp + TestOpImageSync.cpp + TestOpImageCopyToTensor.cpp) target_link_libraries(kompute_tests PRIVATE GTest::gtest_main kompute::kompute diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp index d549dda4..24e37f33 100644 --- a/test/TestAsyncOperations.cpp +++ b/test/TestAsyncOperations.cpp @@ -50,7 +50,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) std::shared_ptr sq = mgr.sequence(); - std::vector> inputsSyncB; + std::vector> inputsSyncB; std::vector> algorithms; for (uint32_t i = 0; i < numParallel; i++) { @@ -81,7 +81,7 @@ TEST(TestAsyncOperations, TestManagerParallelExecution) kp::Manager mgrAsync(0, { 0, 2 }); - std::vector> inputsAsyncB; + std::vector> inputsAsyncB; std::vector> algosAsync; diff --git a/test/TestDestroy.cpp b/test/TestDestroy.cpp index f6b92cd9..63052e9e 100644 --- a/test/TestDestroy.cpp +++ b/test/TestDestroy.cpp @@ -36,14 +36,14 @@ TEST(TestDestroy, TestDestroyTensorSingle) mgr.algorithm({ tensorA }, spirv); // Sync values to and from device - mgr.sequence()->eval(algo->getTensors()); + mgr.sequence()->eval(algo->getMemObjects()); EXPECT_EQ(tensorA->vector(), initialValues); mgr.sequence() ->record(algo) ->eval() - ->eval(algo->getTensors()); + ->eval(algo->getMemObjects()); const std::vector expectedFinalValues = { 1.0f, 1.0f, 1.0f }; EXPECT_EQ(tensorA->vector(), expectedFinalValues); @@ -85,9 +85,9 @@ TEST(TestDestroy, TestDestroyTensorVector) mgr.algorithm({ tensorA, tensorB }, spirv); mgr.sequence() - ->record(algo->getTensors()) + ->record(algo->getMemObjects()) ->record(algo) - ->record(algo->getTensors()) + ->record(algo->getMemObjects()) ->eval(); EXPECT_EQ(tensorA->vector(), std::vector({ 2, 2, 2 })); diff --git a/test/TestImage.cpp b/test/TestImage.cpp new file mode 100644 index 00000000..6e191deb --- /dev/null +++ b/test/TestImage.cpp @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "gtest/gtest.h" + +#include "kompute/Kompute.hpp" +#include "kompute/logger/Logger.hpp" + +TEST(TestImage, ConstructorData) +{ + kp::Manager mgr; + std::vector vec{ 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + std::shared_ptr> image = mgr.image(vec, 3, 3, 1); + EXPECT_EQ(image->size(), vec.size()); + EXPECT_EQ(image->dataTypeMemorySize(), sizeof(float)); + EXPECT_EQ(image->vector(), vec); +} + +TEST(TestImage, DataTypes) +{ + kp::Manager mgr; + const int width = 3; + const int height = 3; + + for (int numChannels = 1; numChannels <= 4; numChannels++) { + { + std::vector vec(width * height * numChannels); + std::shared_ptr> image = + mgr.image(vec, width, height, numChannels); + EXPECT_EQ(image->dataType(), kp::Image::ImageDataTypes::eF32); + } + + { + std::vector vec(width * height * numChannels); + std::shared_ptr> image = + mgr.imageT(vec, width, height, numChannels); + EXPECT_EQ(image->dataType(), kp::Image::ImageDataTypes::eS32); + } + + { + std::vector vec(width * height * numChannels); + std::shared_ptr> image = + mgr.imageT(vec, width, height, numChannels); + EXPECT_EQ(image->dataType(), kp::Image::ImageDataTypes::eU32); + } + + { + std::vector vec(width * height * numChannels); + std::shared_ptr> image = + mgr.imageT(vec, width, height, numChannels); + EXPECT_EQ(image->dataType(), kp::Image::ImageDataTypes::eS16); + } + + { + std::vector vec(width * height * numChannels); + std::shared_ptr> image = + mgr.imageT(vec, width, height, numChannels); + EXPECT_EQ(image->dataType(), kp::Image::ImageDataTypes::eU16); + } + + { + std::vector vec(width * height * numChannels); + std::shared_ptr> image = + mgr.imageT(vec, width, height, numChannels); + EXPECT_EQ(image->dataType(), kp::Image::ImageDataTypes::eS8); + } + + { + std::vector vec(width * height * numChannels); + std::shared_ptr> image = + mgr.imageT(vec, width, height, numChannels); + EXPECT_EQ(image->dataType(), kp::Image::ImageDataTypes::eU8); + } + } +} + +TEST(TestImage, InvalidVectorSize) +{ + kp::Manager mgr; + std::vector vec{ 0, 1, 2, 3 }; + + // The vector is too small to initialise all the data in the image. + EXPECT_THROW(mgr.image(vec, 3, 3, 1), std::runtime_error); +} + +TEST(TestImage, LargeVectorSize) +{ + kp::Manager mgr; + std::vector vec{ 0, 1, 2, 3, 4, 5, 6, 7, 8 }; + + // The same vector but only the first 4 elements + std::vector result(vec.begin(), vec.begin() + 4); + + // It's OK to initilaise an image from a vector which is larger than the + // image. + std::shared_ptr> image = mgr.image(vec, 2, 2, 1); + EXPECT_EQ(image->size(), 2 * 2 * 1); + + // The output should be equal to the result vector, not the input vector. + EXPECT_EQ(image->vector(), result); +} + +TEST(TestImage, InvalidNumberOfChannels) +{ + kp::Manager mgr; + + // Make a vector big enough to store our theoretical 3x3 image with 5 channels. + std::vector vec(3*3*5); + + // There should be between 1 and 4 channels. + EXPECT_THROW(mgr.image(vec, 3, 3, 0), std::runtime_error); + EXPECT_THROW(mgr.image(vec, 3, 3, 5), std::runtime_error); +} \ No newline at end of file diff --git a/test/TestLogisticRegression.cpp b/test/TestLogisticRegression.cpp index 6b417a94..955c5992 100644 --- a/test/TestLogisticRegression.cpp +++ b/test/TestLogisticRegression.cpp @@ -34,7 +34,7 @@ TEST(TestLogisticRegression, TestMainLogisticRegression) std::shared_ptr> lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::vector> params = { xI, xJ, y, + std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, bIn, bOut, lOut }; @@ -97,21 +97,21 @@ TEST(TestLogisticRegression, TestMainLogisticRegressionManualCopy) std::shared_ptr> y = mgr.tensor({ 0, 0, 0, 1, 1 }); std::shared_ptr> wIn = - mgr.tensor({ 0.001, 0.001 }, kp::Tensor::TensorTypes::eHost); + mgr.tensor({ 0.001, 0.001 }, kp::Memory::MemoryTypes::eHost); std::shared_ptr> wOutI = mgr.tensor({ 0, 0, 0, 0, 0 }); std::shared_ptr> wOutJ = mgr.tensor({ 0, 0, 0, 0, 0 }); std::shared_ptr> bIn = - mgr.tensor({ 0 }, kp::Tensor::TensorTypes::eHost); + mgr.tensor({ 0 }, kp::Memory::MemoryTypes::eHost); std::shared_ptr> bOut = mgr.tensor({ 0, 0, 0, 0, 0 }); std::shared_ptr> lOut = mgr.tensor({ 0, 0, 0, 0, 0 }); - std::vector> params = { xI, xJ, y, + std::vector> params = { xI, xJ, y, wIn, wOutI, wOutJ, bIn, bOut, lOut }; diff --git a/test/TestManager.cpp b/test/TestManager.cpp index a7e488d1..188e44af 100644 --- a/test/TestManager.cpp +++ b/test/TestManager.cpp @@ -13,7 +13,7 @@ TEST(TestManager, EndToEndOpMultEvalFlow) std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); - std::vector> params = { tensorLHS, + std::vector> params = { tensorLHS, tensorRHS, tensorOutput }; @@ -33,7 +33,7 @@ TEST(TestManager, EndToEndOpMultSeqFlow) std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); - std::vector> params = { tensorLHS, + std::vector> params = { tensorLHS, tensorRHS, tensorOutput }; @@ -54,7 +54,7 @@ TEST(TestManager, TestMultipleSequences) std::shared_ptr> tensorRHS = mgr.tensor({ 2, 4, 6 }); std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); - std::vector> params = { tensorLHS, + std::vector> params = { tensorLHS, tensorRHS, tensorOutput }; @@ -91,7 +91,7 @@ TEST(TestManager, TestClearDestroy) std::shared_ptr> tensorOutput = mgr.tensor({ 0, 0, 0 }); - std::vector> params = { tensorLHS, + std::vector> params = { tensorLHS, tensorRHS, tensorOutput }; diff --git a/test/TestMultipleAlgoExecutions.cpp b/test/TestMultipleAlgoExecutions.cpp index f3d7315b..1ef0fb94 100644 --- a/test/TestMultipleAlgoExecutions.cpp +++ b/test/TestMultipleAlgoExecutions.cpp @@ -45,7 +45,7 @@ TEST(TestMultipleAlgoExecutions, TestEndToEndFunctionality) } )"); - std::vector> params = { + std::vector> params = { tensorInA, tensorInB, tensorOutA, tensorOutB }; @@ -93,7 +93,7 @@ TEST(TestMultipleAlgoExecutions, SingleSequenceRecord) std::vector spirv = compileSource(shader); { - // A sharedMemoryBarrier is required as the shader is not thread-safe:w + // A sharedMemoryBarrier is required as the shader is not thread-safe std::shared_ptr shaderBarrier{ new kp::OpMemoryBarrier({ tensorA }, vk::AccessFlagBits::eTransferRead, @@ -256,7 +256,7 @@ TEST(TestMultipleAlgoExecutions, TestAlgorithmUtilFunctions) } )"); - std::vector> params = { + std::vector> params = { tensorInA, tensorInB, tensorOutA, tensorOutB }; diff --git a/test/TestOpImageCopy.cpp b/test/TestOpImageCopy.cpp new file mode 100644 index 00000000..e71dce5e --- /dev/null +++ b/test/TestOpImageCopy.cpp @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "gtest/gtest.h" + +#include "kompute/Kompute.hpp" +#include "kompute/logger/Logger.hpp" + +#include "shaders/Utils.hpp" + +TEST(TestOpImageCopy, CopyDeviceToDeviceImage) +{ + kp::Manager mgr; + + std::vector testVecA{ 1, 2, 3 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr imageA = mgr.image(testVecA, 3, 1, 1); + std::shared_ptr imageB = mgr.image(testVecB, 3, 1, 1); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence() + ->eval({ imageA, imageB }) + ->eval({ imageA, imageB }) + ->eval({ imageA, imageB }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(imageA->vector(), imageB->vector()); +} + +TEST(TestOpImageCopy, CopyDeviceToDeviceImage2D) +{ + kp::Manager mgr; + + std::vector testVecA; + std::vector testVecB; + + for (int i = 0; i < 256; i++) { + testVecA.push_back(i); + testVecB.push_back(0); + } + + std::shared_ptr imageA = mgr.image(testVecA, 16, 16, 1); + std::shared_ptr imageB = mgr.image(testVecB, 16, 16, 1); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence() + ->eval({ imageA, imageB }) + ->eval({ imageA, imageB }) + ->eval({ imageA, imageB }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(imageA->vector(), imageB->vector()); + + // Make sure that the vector matches the input vector + for (int i = 0; i < 256; i++) { + EXPECT_EQ(imageA->vector()[i], testVecA[i]); + EXPECT_EQ(imageB->vector()[i], testVecA[i]); + } +} + +TEST(TestOpImageCopy, CopyDeviceToDeviceImageMulti) +{ + kp::Manager mgr; + + std::vector testVecA{ 2, 3, 4 }; + std::vector testVecB{ 0, 0, 0 }; + std::vector testVecC{ 0, 0, 0 }; + + std::shared_ptr imageA = mgr.image(testVecA, 3, 1, 1); + std::shared_ptr imageB = mgr.image(testVecB, 3, 1, 1); + std::shared_ptr tensorC = mgr.image(testVecC, 3, 1, 1); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(imageB->isInit()); + EXPECT_TRUE(tensorC->isInit()); + + mgr.sequence() + ->eval({ imageA, imageB, tensorC }) + ->eval({ imageA, imageB, tensorC }); + + EXPECT_EQ(imageA->vector(), imageB->vector()); + EXPECT_EQ(imageA->vector(), tensorC->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ imageB, tensorC }); + + EXPECT_EQ(imageA->vector(), imageB->vector()); + EXPECT_EQ(imageA->vector(), tensorC->vector()); +} + +TEST(TestOpImageCopy, CopyDeviceToHostImage) +{ + kp::Manager mgr; + + std::vector testVecA{ 3, 4, 5 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr imageA = mgr.image(testVecA, 3, 1, 1); + std::shared_ptr imageB = + mgr.image(testVecB, 3, 1, 1, kp::Memory::MemoryTypes::eHost); + + // Only calling sync on device type tensor + mgr.sequence()->eval({ imageA }); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence()->eval({ imageA, imageB }); + + EXPECT_EQ(imageA->vector(), imageB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ imageB }); + EXPECT_EQ(imageA->vector(), imageB->vector()); +} + +TEST(TestOpImageCopy, CopyHostToDeviceImage) +{ + kp::Manager mgr; + + std::vector testVecA{ 4, 5, 6 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr imageA = + mgr.image(testVecA, 3, 1, 1, kp::Memory::MemoryTypes::eHost); + std::shared_ptr imageB = mgr.image(testVecB, 3, 1, 1); + + // Only calling sync on device type tensor + mgr.sequence()->eval({ imageA, imageB }); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence()->eval({ imageA, imageB }); + + EXPECT_EQ(imageA->vector(), imageB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ imageB }); + EXPECT_EQ(imageA->vector(), imageB->vector()); +} + +TEST(TestOpImageCopy, CopyHostToHostImage) +{ + kp::Manager mgr; + + std::vector testVecA{ 5, 6, 7 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr imageA = + mgr.image(testVecA, 3, 1, 1, kp::Memory::MemoryTypes::eHost); + std::shared_ptr imageB = + mgr.image(testVecB, 3, 1, 1, kp::Memory::MemoryTypes::eHost); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence() + ->eval({ imageA }) + ->eval({ imageA, imageB }); + + EXPECT_EQ(imageA->vector(), imageB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ imageB }); + EXPECT_EQ(imageA->vector(), imageB->vector()); +} + +TEST(TestOpImageCopy, CopyDeviceAndHostToDeviceAndHostImage) +{ + kp::Manager mgr; + + std::vector testVecA{ 1, 2, 3 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr imageA = + mgr.image(testVecA, 3, 1, 1, kp::Memory::MemoryTypes::eDeviceAndHost); + std::shared_ptr imageB = + mgr.image(testVecB, 3, 1, 1, kp::Memory::MemoryTypes::eDeviceAndHost); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence() + ->eval({ imageA, imageB }) + ->eval({ imageA, imageB }) + ->eval({ imageA, imageB }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(imageA->vector(), imageB->vector()); +} + +TEST(TestOpImageCopy, SingleImageShouldFail) +{ + kp::Manager mgr; + + std::vector testVecA{ 6, 7, 8 }; + + std::shared_ptr imageA = + mgr.image(testVecA, 3, 1, 1, kp::Memory::MemoryTypes::eHost); + + EXPECT_TRUE(imageA->isInit()); + + EXPECT_THROW(mgr.sequence()->eval({ imageA }), + std::runtime_error); +} + +TEST(TestOpImageCopy, TensorShouldFail) +{ + kp::Manager mgr; + + std::vector testVecA{ 6, 7, 8 }; + + std::shared_ptr image = + mgr.image(testVecA, 3, 1, 1, kp::Memory::MemoryTypes::eHost); + + std::shared_ptr tensor = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + + EXPECT_THROW(mgr.sequence()->eval({ image, tensor }), + std::runtime_error); +} + +TEST(TestOpImageCopy, CopyThroughStorageImage) +{ + kp::Manager mgr; + + std::vector testVecIn{ 9, 1, 3 }; + std::vector testVecOut{ 0, 0, 0 }; + + std::shared_ptr ImageIn = mgr.image(testVecIn, 3, 1, 1); + std::shared_ptr ImageOut = mgr.image(testVecOut, 3, 1, 1); + // Image storage requires a vector to be passed only to reflect size + std::shared_ptr tensorStorage = + mgr.image({ 0, 0, 0 }, 3, 1, 1, kp::Memory::MemoryTypes::eStorage); + + mgr.sequence() + ->eval({ ImageIn, ImageOut }) + ->eval({ ImageIn, tensorStorage }) + ->eval({ tensorStorage, ImageOut }) + ->eval({ ImageIn, ImageOut }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(ImageIn->vector(), ImageOut->vector()); +} + +TEST(TestOpImageCopy, CopyImageThroughStorageViaAlgorithms) +{ + kp::Manager mgr; + + std::vector testVecIn{ 9, 1, 3 }; + std::vector testVecOut{ 0, 0, 0 }; + + std::shared_ptr ImageIn = mgr.image(testVecIn, 3, 1, 1); + std::shared_ptr ImageOut = mgr.image(testVecOut, 3, 1, 1); + // Image storage requires a vector to be passed only to reflect size + std::shared_ptr tensorStorage = + mgr.image({ 0, 0, 0 }, 3, 1, 1, kp::Memory::MemoryTypes::eStorage); + + EXPECT_TRUE(ImageIn->isInit()); + EXPECT_TRUE(ImageOut->isInit()); + + // Copy to storage tensor through algorithm + std::string shaderA = (R"( + #version 450 + + layout (local_size_x = 1) in; + + // The input tensors bind index is relative to index in parameter passed + layout(set = 0, binding = 0, r32f) uniform image2D image_in; + layout(set = 0, binding = 1, r32f) uniform image2D image_out; + + void main() { + uint index = gl_GlobalInvocationID.x; + imageStore(image_out, ivec2(index, 0), imageLoad(image_in, ivec2(index, 0))) ; + } + )"); + + auto algoA = + mgr.algorithm({ ImageIn, tensorStorage }, compileSource(shaderA)); + + // Copy from storage tensor to output tensor + std::string shaderB = (R"( + #version 450 + + layout (local_size_x = 1) in; + + // The input tensors bind index is relative to index in parameter passed + layout(set = 0, binding = 0, r32f) uniform image2D image_in; + layout(set = 0, binding = 1, r32f) uniform image2D image_out; + + void main() { + uint index = gl_GlobalInvocationID.x; + imageStore(image_out, ivec2(index, 0), imageLoad(image_in, ivec2(index, 0))) ; + } + )"); + + auto algoB = + mgr.algorithm({ tensorStorage, ImageOut }, compileSource(shaderB)); + + mgr.sequence() + ->eval({ ImageIn }) + ->eval(algoA) + ->eval(algoB) + ->eval({ ImageOut }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(ImageIn->vector(), ImageOut->vector()); +} diff --git a/test/TestOpImageCopyToTensor.cpp b/test/TestOpImageCopyToTensor.cpp new file mode 100644 index 00000000..17c78f81 --- /dev/null +++ b/test/TestOpImageCopyToTensor.cpp @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "gtest/gtest.h" + +#include "kompute/Kompute.hpp" +#include "kompute/logger/Logger.hpp" + +#include "shaders/Utils.hpp" + +TEST(TestOpImageCopyToTensor, CopyDeviceToDeviceTensor) +{ + + kp::Manager mgr; + + std::vector testVecA{ 1, 2, 3 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr tensor = mgr.tensor(testVecA); + std::shared_ptr image = + mgr.image(testVecB, testVecB.size(), 1, 1); + + EXPECT_TRUE(tensor->isInit()); + EXPECT_TRUE(image->isInit()); + + mgr.sequence() + ->eval({ tensor }) + ->eval({ image }) + ->eval({ image, tensor }) + ->eval({ tensor }) + ->eval({ image }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(tensor->vector(), image->vector()); +} + +TEST(TestOpImageCopyToTensor, CopyDeviceToDeviceTensorMulti) +{ + + kp::Manager mgr; + + std::vector testVecA{ 2, 3, 4 }; + std::vector testVecB{ 0, 0, 0 }; + std::vector testVecC{ 0, 0, 0 }; + + std::shared_ptr imageA = + mgr.image(testVecA, testVecB.size(), 1, 1); + std::shared_ptr tensorB = mgr.tensor(testVecB); + std::shared_ptr tensorC = mgr.tensor(testVecC); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(tensorB->isInit()); + EXPECT_TRUE(tensorC->isInit()); + + mgr.sequence() + ->eval({ imageA }) + ->eval({ tensorB, tensorC }) + ->eval({ imageA, tensorB, tensorC }); + + EXPECT_EQ(imageA->vector(), tensorB->vector()); + EXPECT_EQ(imageA->vector(), tensorC->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ tensorB, tensorC }); + + EXPECT_EQ(imageA->vector(), tensorB->vector()); + EXPECT_EQ(imageA->vector(), tensorC->vector()); +} + +TEST(TestOpImageCopyToTensor, CopyDeviceToHostTensor) +{ + + kp::Manager mgr; + + std::vector testVecA{ 3, 4, 5 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr imageA = + mgr.image(testVecA, testVecA.size(), 1, 1); + std::shared_ptr tensorB = + mgr.tensor(testVecB, kp::Memory::MemoryTypes::eHost); + + // Only calling sync on device type tensor + mgr.sequence()->eval({ imageA }); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(tensorB->isInit()); + + mgr.sequence()->eval({ imageA, tensorB }); + + EXPECT_EQ(imageA->vector(), tensorB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ tensorB }); + EXPECT_EQ(imageA->vector(), tensorB->vector()); +} + +TEST(TestOpImageCopyToTensor, CopyHostToDeviceTensor) +{ + + kp::Manager mgr; + + std::vector testVecA{ 4, 5, 6 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr imageA = mgr.image( + testVecA, testVecA.size(), 1, 1, kp::Memory::MemoryTypes::eHost); + std::shared_ptr tensorB = mgr.tensor(testVecB); + + // Only calling sync on device type tensor + mgr.sequence()->eval({ imageA }); + mgr.sequence()->eval({ tensorB }); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(tensorB->isInit()); + + mgr.sequence()->eval({ imageA, tensorB }); + + EXPECT_EQ(imageA->vector(), tensorB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ tensorB }); + EXPECT_EQ(imageA->vector(), tensorB->vector()); +} + +TEST(TestOpImageCopyToTensor, CopyHostToHostTensor) +{ + + kp::Manager mgr; + + std::vector testVecA{ 5, 6, 7 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr imageA = mgr.image( + testVecA, testVecA.size(), 1, 1, kp::Memory::MemoryTypes::eHost); + std::shared_ptr tensorB = + mgr.tensor(testVecB, kp::Memory::MemoryTypes::eHost); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(tensorB->isInit()); + + mgr.sequence() + ->eval({ imageA }) + ->eval({ imageA, tensorB }); + + EXPECT_EQ(imageA->vector(), tensorB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ tensorB }); + EXPECT_EQ(imageA->vector(), tensorB->vector()); +} + +TEST(TestOpImageCopyToTensor, SingleTensorShouldFail) +{ + + kp::Manager mgr; + + std::vector testVecA{ 6, 7, 8 }; + + std::shared_ptr imageA = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + + EXPECT_TRUE(imageA->isInit()); + + EXPECT_THROW(mgr.sequence()->eval({ imageA }), + std::runtime_error); +} + +TEST(TestOpImageCopyToTensor, TensorsShouldFail) +{ + kp::Manager mgr; + + std::vector testVecA{ 6, 7, 8 }; + + std::shared_ptr tensorA = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + + std::shared_ptr tensorB = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + + EXPECT_THROW( + mgr.sequence()->eval({ tensorA, tensorB }), + std::runtime_error); +} + +TEST(TestOpImageCopyToTensor, ImagesShouldFail) +{ + kp::Manager mgr; + + std::vector testVecA{ 6, 7, 8 }; + + std::shared_ptr imageA = mgr.image( + testVecA, testVecA.size(), 1, 1, kp::Memory::MemoryTypes::eHost); + + std::shared_ptr imageB = mgr.image( + testVecA, testVecA.size(), 1, 1, kp::Memory::MemoryTypes::eHost); + + EXPECT_THROW( + mgr.sequence()->eval({ imageA, imageB }), + std::runtime_error); +} + +TEST(TestOpImageCopyToTensor, CopyThroughStorageTensor) +{ + kp::Manager mgr; + + std::vector testVecIn{ 9, 1, 3 }; + std::vector testVecOut{ 0, 0, 0 }; + + std::shared_ptr tensorOut = mgr.tensor(testVecOut); + std::shared_ptr imageIn = + mgr.image(testVecIn, testVecIn.size(), 1, 1); + // Image storage requires a vector to be passed only to reflect size + std::shared_ptr imageStorage = + mgr.image(testVecIn, testVecIn.size(), 1, 1, kp::Memory::MemoryTypes::eStorage); + + mgr.sequence() + ->eval({ imageIn }) + ->eval({ tensorOut }) + ->eval({ imageIn, imageStorage }) + ->eval({ imageStorage, tensorOut }) + ->eval({ imageIn }) + ->eval({ tensorOut }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(imageIn->vector(), tensorOut->vector()); +} diff --git a/test/TestOpImageCreate.cpp b/test/TestOpImageCreate.cpp new file mode 100644 index 00000000..7eb3f1f0 --- /dev/null +++ b/test/TestOpImageCreate.cpp @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "gtest/gtest.h" + +#include "kompute/Kompute.hpp" +#include "kompute/logger/Logger.hpp" + +TEST(TestOpImageCreate, CreateSingleImageSingleOp) +{ + std::vector testVecA{ 9, 8, 7 }; + std::shared_ptr> imageA = nullptr; + + { + kp::Manager mgr; + + imageA = mgr.image(testVecA, 3, 1, 1); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_EQ(imageA->vector(), testVecA); + } + + EXPECT_FALSE(imageA->isInit()); +} + +TEST(TestOpImageCreate, NoErrorIfImageFreedBefore) +{ + std::vector testVecA{ 9, 8, 7 }; + std::vector testVecB{ 6, 5, 4 }; + + kp::Manager mgr; + + std::shared_ptr> imageA = mgr.image(testVecA, 1, 3, 1); + std::shared_ptr> imageB = mgr.image(testVecB, 3, 1, 1); + + EXPECT_EQ(imageA->vector(), testVecA); + EXPECT_EQ(imageB->vector(), testVecB); + + imageA->destroy(); + imageB->destroy(); + + EXPECT_FALSE(imageA->isInit()); + EXPECT_FALSE(imageB->isInit()); +} + +TEST(TestOpImageCreate, ExceptionOnZeroSizeImage) +{ + std::vector testVecA; + + kp::Manager mgr; + + try { + std::shared_ptr> imageA = + mgr.image(testVecA, 1, 1, 1); + } catch (const std::runtime_error& err) { + // check exception + ASSERT_TRUE(std::string(err.what()).find("zero-sized") != + std::string::npos); + } +} + +TEST(TestOpImageCreate, ExceptionOnInvalidTiledImage) +{ + std::vector testVecA; + + kp::Manager mgr; + + try { + std::shared_ptr> imageA = + mgr.image(testVecA, + 1, + 1, + 1, + vk::ImageTiling::eOptimal, + kp::Memory::MemoryTypes::eDeviceAndHost); + } catch (const std::runtime_error& err) { + // check exception + ASSERT_TRUE(std::string(err.what()) + .find("optimal tiling is only supported for") != + std::string::npos); + } + + try { + std::shared_ptr> imageA = + mgr.image(testVecA, + 1, + 1, + 1, + vk::ImageTiling::eOptimal, + kp::Memory::MemoryTypes::eHost); + } catch (const std::runtime_error& err) { + // check exception + ASSERT_TRUE(std::string(err.what()) + .find("optimal tiling is only supported for") != + std::string::npos); + } +} diff --git a/test/TestOpImageSync.cpp b/test/TestOpImageSync.cpp new file mode 100644 index 00000000..1a39c5d0 --- /dev/null +++ b/test/TestOpImageSync.cpp @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "gtest/gtest.h" + +#include "kompute/Kompute.hpp" +#include "kompute/logger/Logger.hpp" + +TEST(TestOpImageSync, SyncToDeviceMemorySingleImage) +{ + kp::Manager mgr; + + std::vector testVecPreA{ 0, 0, 0 }; + std::vector testVecPostA{ 9, 8, 7 }; + + std::shared_ptr> imageA = mgr.image(testVecPreA, 3, 1, 1); + + EXPECT_TRUE(imageA->isInit()); + + imageA->setData(testVecPostA); + + mgr.sequence()->eval({ imageA }); + + mgr.sequence()->eval({ imageA }); + + EXPECT_EQ(imageA->vector(), testVecPostA); +} + +TEST(TestOpImageSync, SyncToDeviceMemoryMultiImage) +{ + + kp::Manager mgr; + + std::vector testVec{ 9, 8, 7 }; + + std::shared_ptr> imageA = mgr.image({ 0, 0, 0 }, 3, 1, 1); + std::shared_ptr> imageB = mgr.image({ 0, 0, 0 }, 3, 1, 1); + std::shared_ptr> imageC = mgr.image({ 0, 0, 0 }, 3, 1, 1); + + EXPECT_TRUE(imageA->isInit()); + EXPECT_TRUE(imageB->isInit()); + EXPECT_TRUE(imageC->isInit()); + + imageA->setData(testVec); + + mgr.sequence()->eval({ imageA }); + + mgr.sequence()->eval({ imageA, imageB, imageC }); + + mgr.sequence()->eval({ imageA, imageB, imageC }); + + EXPECT_EQ(imageA->vector(), testVec); + EXPECT_EQ(imageB->vector(), testVec); + EXPECT_EQ(imageC->vector(), testVec); +} + +TEST(TestOpImageSync, NegativeUnrelatedImageSync) +{ + kp::Manager mgr; + + std::vector testVecIn{ 9, 1, 3 }; + std::vector testVecOut{ 0, 0, 0 }; + + std::shared_ptr ImageIn = mgr.image(testVecIn, 3, 1, 1); + std::shared_ptr ImageOut = mgr.image(testVecOut, 3, 1, 1); + + EXPECT_TRUE(ImageIn->isInit()); + EXPECT_TRUE(ImageOut->isInit()); + + // Syncing one image to the device should not update an unrelated one + mgr.sequence() + ->eval({ ImageIn }) + ->eval({ ImageOut }); + + // Making sure the GPU holds the same vector + EXPECT_NE(ImageIn->vector(), ImageOut->vector()); +} + +TEST(TestOpImageSync, TensorShouldFail) +{ + kp::Manager mgr; + + std::vector testVecPreA{ 0, 0, 0 }; + + std::shared_ptr> tensor = mgr.tensor(testVecPreA); + + EXPECT_THROW(mgr.sequence()->eval({ tensor }), + std::runtime_error); + + EXPECT_THROW(mgr.sequence()->eval({ tensor }), + std::runtime_error); +} diff --git a/test/TestOpShadersFromStringAndFile.cpp b/test/TestOpShadersFromStringAndFile.cpp index dc2a0ecb..07b000ff 100644 --- a/test/TestOpShadersFromStringAndFile.cpp +++ b/test/TestOpShadersFromStringAndFile.cpp @@ -64,7 +64,7 @@ TEST(TestShader, ShaderRawDataFromConstructorCustomDataType) std::vector spirv = compileSource(shader); - std::vector> params = { tensorA, tensorB }; + std::vector> params = { tensorA, tensorB }; mgr.sequence() ->eval(params) @@ -127,7 +127,7 @@ TEST(TestOpAlgoCreate, ShaderRawDataFromConstructor) std::vector spirv = compileSource(shader); - std::vector> params = { tensorA, tensorB }; + std::vector> params = { tensorA, tensorB }; mgr.sequence() ->eval(params) @@ -147,7 +147,7 @@ TEST(TestOpAlgoCreate, ShaderCompiledDataFromConstructor) std::vector spirv(kp::TEST_OP_CUSTOM_SHADER_COMP_SPV.begin(), kp::TEST_OP_CUSTOM_SHADER_COMP_SPV.end()); - std::vector> params = { tensorA, tensorB }; + std::vector> params = { tensorA, tensorB }; mgr.sequence() ->eval(params) diff --git a/test/TestOpTensorCopy.cpp b/test/TestOpTensorCopy.cpp index 60e0c485..5555e272 100644 --- a/test/TestOpTensorCopy.cpp +++ b/test/TestOpTensorCopy.cpp @@ -71,7 +71,7 @@ TEST(TestOpTensorCopy, CopyDeviceToHostTensor) std::shared_ptr> tensorA = mgr.tensor(testVecA); std::shared_ptr> tensorB = - mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost); + mgr.tensor(testVecB, kp::Memory::MemoryTypes::eHost); // Only calling sync on device type tensor mgr.sequence()->eval({ tensorA }); @@ -97,7 +97,7 @@ TEST(TestOpTensorCopy, CopyHostToDeviceTensor) std::vector testVecB{ 0, 0, 0 }; std::shared_ptr> tensorA = - mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); std::shared_ptr> tensorB = mgr.tensor(testVecB); // Only calling sync on device type tensor @@ -124,9 +124,9 @@ TEST(TestOpTensorCopy, CopyHostToHostTensor) std::vector testVecB{ 0, 0, 0 }; std::shared_ptr> tensorA = - mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); std::shared_ptr> tensorB = - mgr.tensor(testVecB, kp::Tensor::TensorTypes::eHost); + mgr.tensor(testVecB, kp::Memory::MemoryTypes::eHost); EXPECT_TRUE(tensorA->isInit()); EXPECT_TRUE(tensorB->isInit()); @@ -150,7 +150,7 @@ TEST(TestOpTensorCopy, SingleTensorShouldFail) std::vector testVecA{ 6, 7, 8 }; std::shared_ptr> tensorA = - mgr.tensor(testVecA, kp::Tensor::TensorTypes::eHost); + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); EXPECT_TRUE(tensorA->isInit()); @@ -169,7 +169,7 @@ TEST(TestOpTensorCopy, CopyThroughStorageTensor) std::shared_ptr> tensorOut = mgr.tensor(testVecOut); // Tensor storage requires a vector to be passed only to reflect size std::shared_ptr> tensorStorage = - mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eStorage); + mgr.tensor({ 0, 0, 0 }, kp::Memory::MemoryTypes::eStorage); mgr.sequence() ->eval({ tensorIn, tensorOut }) @@ -192,7 +192,7 @@ TEST(TestOpTensorCopy, CopyTensorThroughStorageViaAlgorithms) std::shared_ptr> tensorOut = mgr.tensor(testVecOut); // Tensor storage requires a vector to be passed only to reflect size std::shared_ptr> tensorStorage = - mgr.tensor({ 0, 0, 0 }, kp::Tensor::TensorTypes::eStorage); + mgr.tensor({ 0, 0, 0 }, kp::Memory::MemoryTypes::eStorage); EXPECT_TRUE(tensorIn->isInit()); EXPECT_TRUE(tensorOut->isInit()); diff --git a/test/TestOpTensorCopyToImage.cpp b/test/TestOpTensorCopyToImage.cpp new file mode 100644 index 00000000..855c3e3b --- /dev/null +++ b/test/TestOpTensorCopyToImage.cpp @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: Apache-2.0 + +#include "gtest/gtest.h" + +#include "kompute/Kompute.hpp" +#include "kompute/logger/Logger.hpp" + +#include "shaders/Utils.hpp" + +TEST(TestOpTensorCopyToImage, CopyDeviceToDeviceTensor) +{ + + kp::Manager mgr; + + std::vector testVecA{ 1, 2, 3 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr tensor = mgr.tensor(testVecA); + std::shared_ptr image = + mgr.image(testVecB, testVecB.size(), 1, 1); + + EXPECT_TRUE(tensor->isInit()); + EXPECT_TRUE(image->isInit()); + + mgr.sequence() + ->eval({ tensor }) + ->eval({ image }) + ->eval({ tensor, image }) + ->eval({ tensor }) + ->eval({ image }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(tensor->vector(), image->vector()); +} + +TEST(TestOpTensorCopyToImage, CopyDeviceToDeviceTensorMulti) +{ + + kp::Manager mgr; + + std::vector testVecA{ 2, 3, 4 }; + std::vector testVecB{ 0, 0, 0 }; + std::vector testVecC{ 0, 0, 0 }; + + std::shared_ptr tensorA = mgr.tensor(testVecA); + std::shared_ptr imageB = + mgr.image(testVecB, testVecB.size(), 1, 1); + std::shared_ptr imageC = + mgr.image(testVecC, testVecC.size(), 1, 1); + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(imageB->isInit()); + EXPECT_TRUE(imageC->isInit()); + + mgr.sequence() + ->eval({ tensorA }) + ->eval({ imageB, imageC }) + ->eval({ tensorA, imageB, imageC }); + + EXPECT_EQ(tensorA->vector(), imageB->vector()); + EXPECT_EQ(tensorA->vector(), imageC->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ imageB, imageC }); + + EXPECT_EQ(tensorA->vector(), imageB->vector()); + EXPECT_EQ(tensorA->vector(), imageC->vector()); +} + +TEST(TestOpTensorCopyToImage, CopyDeviceToHostTensor) +{ + + kp::Manager mgr; + + std::vector testVecA{ 3, 4, 5 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr tensorA = mgr.tensor(testVecA); + std::shared_ptr imageB = mgr.image( + testVecB, testVecB.size(), 1, 1, kp::Memory::MemoryTypes::eHost); + + // Only calling sync on device type tensor + mgr.sequence()->eval({ tensorA }); + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence()->eval({ tensorA, imageB }); + + EXPECT_EQ(tensorA->vector(), imageB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ imageB }); + EXPECT_EQ(tensorA->vector(), imageB->vector()); +} + +TEST(TestOpTensorCopyToImage, CopyHostToDeviceTensor) +{ + + kp::Manager mgr; + + std::vector testVecA{ 4, 5, 6 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr tensorA = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + std::shared_ptr imageB = + mgr.image(testVecB, testVecB.size(), 1, 1); + + // Only calling sync on device type tensor + mgr.sequence()->eval({ tensorA }); + mgr.sequence()->eval({ imageB }); + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence()->eval({ tensorA, imageB }); + + EXPECT_EQ(tensorA->vector(), imageB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ imageB }); + EXPECT_EQ(tensorA->vector(), imageB->vector()); +} + +TEST(TestOpTensorCopyToImage, CopyHostToHostTensor) +{ + + kp::Manager mgr; + + std::vector testVecA{ 5, 6, 7 }; + std::vector testVecB{ 0, 0, 0 }; + + std::shared_ptr tensorA = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + std::shared_ptr imageB = mgr.image( + testVecB, testVecB.size(), 1, 1, kp::Memory::MemoryTypes::eHost); + + EXPECT_TRUE(tensorA->isInit()); + EXPECT_TRUE(imageB->isInit()); + + mgr.sequence() + ->eval({ tensorA }) + ->eval({ tensorA, imageB }); + + EXPECT_EQ(tensorA->vector(), imageB->vector()); + + // Making sure the GPU holds the same vector + mgr.sequence()->eval({ imageB }); + EXPECT_EQ(tensorA->vector(), imageB->vector()); +} + +TEST(TestOpTensorCopyToImage, SingleTensorShouldFail) +{ + + kp::Manager mgr; + + std::vector testVecA{ 6, 7, 8 }; + + std::shared_ptr tensorA = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + + EXPECT_TRUE(tensorA->isInit()); + + EXPECT_THROW(mgr.sequence()->eval({ tensorA }), + std::runtime_error); +} + +TEST(TestOpTensorCopyToImage, TensorsShouldFail) +{ + kp::Manager mgr; + + std::vector testVecA{ 6, 7, 8 }; + + std::shared_ptr tensorA = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + + std::shared_ptr tensorB = + mgr.tensor(testVecA, kp::Memory::MemoryTypes::eHost); + + EXPECT_THROW( + mgr.sequence()->eval({ tensorA, tensorB }), + std::runtime_error); +} + +TEST(TestOpTensorCopyToImage, ImagesShouldFail) +{ + kp::Manager mgr; + + std::vector testVecA{ 6, 7, 8 }; + + std::shared_ptr imageA = mgr.image( + testVecA, testVecA.size(), 1, 1, kp::Memory::MemoryTypes::eHost); + + std::shared_ptr imageB = mgr.image( + testVecA, testVecA.size(), 1, 1, kp::Memory::MemoryTypes::eHost); + + EXPECT_THROW( + mgr.sequence()->eval({ imageA, imageB }), + std::runtime_error); +} + +TEST(TestOpTensorCopyToImage, CopyThroughStorageTensor) +{ + kp::Manager mgr; + + std::vector testVecIn{ 9, 1, 3 }; + std::vector testVecOut{ 0, 0, 0 }; + + std::shared_ptr tensorIn = mgr.tensor(testVecIn); + std::shared_ptr imageOut = + mgr.image(testVecOut, testVecOut.size(), 1, 1); + // Tensor storage requires a vector to be passed only to reflect size + std::shared_ptr tensorStorage = + mgr.tensor({ 0, 0, 0 }, kp::Memory::MemoryTypes::eStorage); + + mgr.sequence() + ->eval({ tensorIn }) + ->eval({ imageOut }) + ->eval({ tensorIn, tensorStorage }) + ->eval({ tensorStorage, imageOut }) + ->eval({ tensorIn }) + ->eval({ imageOut }); + + // Making sure the GPU holds the same vector + EXPECT_EQ(tensorIn->vector(), imageOut->vector()); +} diff --git a/test/TestSpecializationConstant.cpp b/test/TestSpecializationConstant.cpp index 7f15f844..7de9e4ce 100644 --- a/test/TestSpecializationConstant.cpp +++ b/test/TestSpecializationConstant.cpp @@ -35,7 +35,7 @@ TEST(TestSpecializationConstants, TestTwoConstants) std::shared_ptr> tensorB = mgr.tensor({ 0, 0, 0 }); - std::vector> params = { tensorA, + std::vector> params = { tensorA, tensorB }; std::vector spec = std::vector({ 5.0, 0.3 }); @@ -83,7 +83,7 @@ TEST(TestSpecializationConstants, TestConstantsInt) std::shared_ptr> tensorB = mgr.tensorT({ 0, 0, 0 }); - std::vector> params = { tensorA, + std::vector> params = { tensorA, tensorB }; std::vector spec({ -1, -2 }); diff --git a/test/TestWorkgroup.cpp b/test/TestWorkgroup.cpp index 914ee721..e2a29ed4 100644 --- a/test/TestWorkgroup.cpp +++ b/test/TestWorkgroup.cpp @@ -20,7 +20,7 @@ TEST(TestWorkgroup, TestSimpleWorkgroup) tensorA = mgr.tensor(std::vector(16 * 8)); tensorB = mgr.tensor(std::vector(16 * 8)); - std::vector> params = { tensorA, + std::vector> params = { tensorA, tensorB }; std::vector spirv( kp::TEST_WORKGROUP_SHADER_COMP_SPV.begin(),