Skip to content

Commit

Permalink
Improve temporal merging performance (#141)
Browse files Browse the repository at this point in the history
* Double capacity if is running out

* Format PCL point cloud on CPU

* After review
  • Loading branch information
msz-rai authored Apr 19, 2023
1 parent b52e65e commit ad39515
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 6 deletions.
7 changes: 7 additions & 0 deletions src/VArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@ void VArray::reserve(std::size_t newCapacity, bool preserveData)
current().elemCapacity = newCapacity;
}

void VArray::doubleCapacityIfRunningOut(float runningOutThreshold)
{
if (getElemCapacity() * runningOutThreshold < getElemCount()) {
reserve(getElemCapacity() * 2, true);
}
}

VArray::~VArray()
{
for (auto&& [location, state] : instance) {
Expand Down
1 change: 1 addition & 0 deletions src/VArray.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ struct VArray : std::enable_shared_from_this<VArray>
void insertData(const void *src, std::size_t elements, std::size_t offset);
void resize(std::size_t newCount, bool zeroInit=true, bool preserveData=true);
void reserve(std::size_t newCapacity, bool preserveData=true);
void doubleCapacityIfRunningOut(float runningOutThreshold=0.9);
std::size_t getElemSize() const { return sizeOfType; }
std::size_t getElemCount() const { return current().elemCount; }
std::size_t getElemCapacity() const { return current().elemCapacity; }
Expand Down
14 changes: 8 additions & 6 deletions src/api/apiPcl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,18 @@ rgl_graph_write_pcd_file(rgl_node_t node, const char* file_path)
throw InvalidAPIObject(fmt::format("Saving PCD file {} failed - requested node does not have field XYZ.", file_path));
}

// Get formatted data
VArray::Ptr rglCloud = VArray::create<char>();
// TODO(msz-rai): CudaStream for formatAsync: nullptr or pointCloudNode->getGraph()->getStream()?
FormatPointsNode::formatAsync(rglCloud, pointCloudNode, {XYZ_F32, PADDING_32}, nullptr);
// We are not using format node to avoid transferring huge point cloud to GPU (risk of cuda out of memory error)
// We are formatting manually on the CPU instead.
// TODO(msz-rai): CudaStream for getFieldDataTyped: nullptr or pointCloudNode->getGraph()->getStream()?
auto xyzTypedArray = pointCloudNode->getFieldDataTyped<XYZ_F32>(nullptr);
auto xyzData = xyzTypedArray->getReadPtr(MemLoc::Host);

// Convert to PCL cloud
pcl::PointCloud<pcl::PointXYZ> pclCloud;
const pcl::PointXYZ* data = reinterpret_cast<const pcl::PointXYZ*>(rglCloud->getReadPtr(MemLoc::Host));
pclCloud.resize(pointCloudNode->getWidth(), pointCloudNode->getHeight());
pclCloud.assign(data, data + pclCloud.size(), pointCloudNode->getWidth());
for (int i = 0; i < xyzTypedArray->getCount(); ++i) {
pclCloud[i] = pcl::PointXYZ(xyzData[i].x(), xyzData[i].y(), xyzData[i].z());
}
pclCloud.is_dense = pointCloudNode->isDense();

// Save to PCD file
Expand Down
2 changes: 2 additions & 0 deletions src/graph/TemporalMergePointsNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ void TemporalMergePointsNode::schedule(cudaStream_t stream)
size_t pointCount = input->getPointCount();
const auto toMergeData = input->getFieldData(field, stream);
data->insertData(toMergeData->getReadPtr(MemLoc::Device), pointCount, width);
// Double capacity of VArray if is close to run out. It prevents reallocating memory every insertion.
data->doubleCapacityIfRunningOut();
}
width += input->getWidth();
}

0 comments on commit ad39515

Please sign in to comment.