diff --git a/perception/autoware_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu b/perception/autoware_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu index 502ad04223ce9..56bbdd85c79b7 100644 --- a/perception/autoware_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu +++ b/perception/autoware_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu @@ -185,8 +185,12 @@ __global__ void generateBaseFeatures_kernel( unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size, unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs) { - unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x; - unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y; + // exchange x and y to process in a row-major order + // flip x axis direction to process front to back + unsigned int voxel_idx_inverted = blockIdx.y * blockDim.y + threadIdx.y; + unsigned int voxel_idy = blockIdx.x * blockDim.x + threadIdx.x; + if (voxel_idx_inverted >= grid_x_size || voxel_idy >= grid_y_size) return; + unsigned int voxel_idx = grid_x_size - 1 - voxel_idx_inverted; if (voxel_idx >= grid_x_size || voxel_idy >= grid_y_size) return; @@ -220,9 +224,10 @@ cudaError_t generateBaseFeatures_launch( unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream) { + // exchange x and y to process in a row-major order dim3 threads = {32, 32}; dim3 blocks = { - (grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y}; + (grid_y_size + threads.x - 1) / threads.x, (grid_x_size + threads.y - 1) / threads.y}; generateBaseFeatures_kernel<<>>( mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,