diff --git a/include/gtsam_points/factors/impl/integrated_color_consistency_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_color_consistency_factor_impl.hpp index a8138f8..de64acb 100644 --- a/include/gtsam_points/factors/impl/integrated_color_consistency_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_color_consistency_factor_impl.hpp @@ -5,6 +5,12 @@ #include #include +#include +#include + +#ifdef GTSAM_POINTS_TBB +#include +#endif namespace gtsam_points { @@ -83,24 +89,40 @@ void IntegratedColorConsistencyFactor_knn_search(pt.data(), 1, &k_index, &k_sq_dist, max_correspondence_distance_sq); - correspondences[i] = (num_found && k_sq_dist < max_correspondence_distance_sq) ? k_index : -1; - } - } + size_t num_found = target_tree->knn_search(pt.data(), 1, &k_index, &k_sq_dist, max_correspondence_distance_sq); + correspondences[i] = (num_found && k_sq_dist < max_correspondence_distance_sq) ? k_index : -1; + }; - if (do_update) { - last_correspondence_point = delta; + if (is_omp_default()) { +#pragma omp parallel for num_threads(num_threads) schedule(guided, 8) + for (int i = 0; i < frame::size(*source); i++) { + perpoint_task(i); + } + } else { +#ifdef GTSAM_POINTS_TBB + tbb::parallel_for(tbb::blocked_range(0, frame::size(*source), 8), [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); i++) { + perpoint_task(i); + } + }); +#else + std::cerr << "error: TBB is not available" << std::endl; + abort(); +#endif } } @@ -117,27 +139,16 @@ double IntegratedColorConsistencyFactor_> Hs_target; - std::vector> Hs_source; - std::vector> Hs_target_source; - std::vector> bs_target; - std::vector> bs_source; - - if (H_target && H_source && H_target_source && b_target && b_source) { - Hs_target.resize(num_threads, Eigen::Matrix::Zero()); - Hs_source.resize(num_threads, Eigen::Matrix::Zero()); - Hs_target_source.resize(num_threads, Eigen::Matrix::Zero()); - bs_target.resize(num_threads, Eigen::Matrix::Zero()); - bs_source.resize(num_threads, Eigen::Matrix::Zero()); - } - -#pragma omp parallel for num_threads(num_threads) reduction(+ : sum_errors_photo) schedule(guided, 8) - for (int i = 0; i < frame::size(*source); i++) { + const auto perpoint_task = [&]( + int i, + Eigen::Matrix* H_target, + Eigen::Matrix* H_source, + Eigen::Matrix* H_target_source, + Eigen::Matrix* b_target, + Eigen::Matrix* b_source) { const long target_index = correspondences[i]; if (target_index < 0) { - continue; + return 0.0; } // source atributes @@ -157,17 +168,12 @@ double IntegratedColorConsistencyFactor_ J_transed_target = Eigen::Matrix::Zero(); J_transed_target.block<3, 3>(0, 0) = gtsam::SO3::Hat(transed_A.head<3>()); J_transed_target.block<3, 3>(0, 3) = -Eigen::Matrix3d::Identity(); @@ -187,30 +193,36 @@ double IntegratedColorConsistencyFactor_ J_ephoto_target = J_ephoto_transed * J_transed_target; Eigen::Matrix J_ephoto_source = J_ephoto_transed * J_transed_source; - Hs_target[thread_num] += J_ephoto_target.transpose() * photometric_term_weight * J_ephoto_target; - Hs_source[thread_num] += J_ephoto_source.transpose() * photometric_term_weight * J_ephoto_source; - Hs_target_source[thread_num] += J_ephoto_target.transpose() * photometric_term_weight * J_ephoto_source; - bs_target[thread_num] += J_ephoto_target.transpose() * photometric_term_weight * error_photo; - bs_source[thread_num] += J_ephoto_source.transpose() * photometric_term_weight * error_photo; - } + *H_target += J_ephoto_target.transpose() * photometric_term_weight * J_ephoto_target; + *H_source += J_ephoto_source.transpose() * photometric_term_weight * J_ephoto_source; + *H_target_source += J_ephoto_target.transpose() * photometric_term_weight * J_ephoto_source; + *b_target += J_ephoto_target.transpose() * photometric_term_weight * error_photo; + *b_source += J_ephoto_source.transpose() * photometric_term_weight * error_photo; + + return err; + }; + + double sum_errors_photo = 0.0; + + std::vector> Hs_target; + std::vector> Hs_source; + std::vector> Hs_target_source; + std::vector> bs_target; + std::vector> bs_source; if (H_target && H_source && H_target_source && b_target && b_source) { - H_target->setZero(); - H_source->setZero(); - H_target_source->setZero(); - b_target->setZero(); - b_source->setZero(); - - for (int i = 0; i < num_threads; i++) { - (*H_target) += Hs_target[i]; - (*H_source) += Hs_source[i]; - (*H_target_source) += Hs_target_source[i]; - (*b_target) += bs_target[i]; - (*b_source) += bs_source[i]; - } + Hs_target.resize(num_threads, Eigen::Matrix::Zero()); + Hs_source.resize(num_threads, Eigen::Matrix::Zero()); + Hs_target_source.resize(num_threads, Eigen::Matrix::Zero()); + bs_target.resize(num_threads, Eigen::Matrix::Zero()); + bs_source.resize(num_threads, Eigen::Matrix::Zero()); } - return sum_errors_photo; + if (is_omp_default()) { + return scan_matching_reduce_omp(perpoint_task, frame::size(*source), num_threads, H_target, H_source, H_target_source, b_target, b_source); + } else { + return scan_matching_reduce_tbb(perpoint_task, frame::size(*source), H_target, H_source, H_target_source, b_target, b_source); + } } } // namespace gtsam_points \ No newline at end of file diff --git a/include/gtsam_points/factors/impl/integrated_colored_gicp_factor_impl.hpp b/include/gtsam_points/factors/impl/integrated_colored_gicp_factor_impl.hpp index ba1e76f..ee54e17 100644 --- a/include/gtsam_points/factors/impl/integrated_colored_gicp_factor_impl.hpp +++ b/include/gtsam_points/factors/impl/integrated_colored_gicp_factor_impl.hpp @@ -5,6 +5,12 @@ #include #include +#include +#include + +#ifdef GTSAM_POINTS_TBB +#include +#endif namespace gtsam_points { @@ -83,11 +89,14 @@ void IntegratedColoredGICPFactor_: } } + if (do_update) { + last_correspondence_point = delta; + } + correspondences.resize(frame::size(*source)); mahalanobis.resize(frame::size(*source)); -#pragma omp parallel for num_threads(num_threads) schedule(guided, 8) - for (int i = 0; i < frame::size(*source); i++) { + const auto perpoint_task = [&](int i) { if (do_update) { Eigen::Vector4d pt = delta * frame::point(*source, i); pt[3] = frame::intensity(*source, i); @@ -108,9 +117,25 @@ void IntegratedColoredGICPFactor_: mahalanobis[i] = RCR.inverse(); mahalanobis[i](3, 3) = 0.0; } - } + }; - last_correspondence_point = delta; + if (is_omp_default()) { +#pragma omp parallel for num_threads(num_threads) schedule(guided, 8) + for (int i = 0; i < frame::size(*source); i++) { + perpoint_task(i); + } + } else { +#ifdef GTSAM_POINTS_TBB + tbb::parallel_for(tbb::blocked_range(0, frame::size(*source), 8), [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); i++) { + perpoint_task(i); + } + }); +#else + std::cerr << "error: TBB is not available" << std::endl; + abort(); +#endif + } } template @@ -128,28 +153,16 @@ double IntegratedColoredGICPFactor_> Hs_target; - std::vector> Hs_source; - std::vector> Hs_target_source; - std::vector> bs_target; - std::vector> bs_source; - - if (H_target && H_source && H_target_source && b_target && b_source) { - Hs_target.resize(num_threads, Eigen::Matrix::Zero()); - Hs_source.resize(num_threads, Eigen::Matrix::Zero()); - Hs_target_source.resize(num_threads, Eigen::Matrix::Zero()); - bs_target.resize(num_threads, Eigen::Matrix::Zero()); - bs_source.resize(num_threads, Eigen::Matrix::Zero()); - } - -#pragma omp parallel for num_threads(num_threads) reduction(+ : sum_errors_geom) reduction(+ : sum_errors_photo) schedule(guided, 8) - for (int i = 0; i < frame::size(*source); i++) { + const auto perpoint_task = [&]( + int i, + Eigen::Matrix* H_target, + Eigen::Matrix* H_source, + Eigen::Matrix* H_target_source, + Eigen::Matrix* b_target, + Eigen::Matrix* b_source) { const long target_index = correspondences[i]; if (target_index < 0) { - continue; + return 0.0; } // source atributes @@ -167,25 +180,20 @@ double IntegratedColoredGICPFactor_ J_transed_target = Eigen::Matrix::Zero(); J_transed_target.block<3, 3>(0, 0) = gtsam::SO3::Hat(transed_A.head<3>()); J_transed_target.block<3, 3>(0, 3) = -Eigen::Matrix3d::Identity(); @@ -198,11 +206,11 @@ double IntegratedColoredGICPFactor_ J_projected_transed = Eigen::Matrix4d::Identity() - normal_B * normal_B.transpose(); @@ -215,30 +223,20 @@ double IntegratedColoredGICPFactor_ J_ephoto_target = J_ephoto_transed * J_transed_target; Eigen::Matrix J_ephoto_source = J_ephoto_transed * J_transed_source; - Hs_target[thread_num] += J_ephoto_target.transpose() * photometric_term_weight * J_ephoto_target; - Hs_source[thread_num] += J_ephoto_source.transpose() * photometric_term_weight * J_ephoto_source; - Hs_target_source[thread_num] += J_ephoto_target.transpose() * photometric_term_weight * J_ephoto_source; - bs_target[thread_num] += J_ephoto_target.transpose() * photometric_term_weight * error_photo; - bs_source[thread_num] += J_ephoto_source.transpose() * photometric_term_weight * error_photo; - } + *H_target += J_ephoto_target.transpose() * photometric_term_weight * J_ephoto_target; + *H_source += J_ephoto_source.transpose() * photometric_term_weight * J_ephoto_source; + *H_target_source += J_ephoto_target.transpose() * photometric_term_weight * J_ephoto_source; + *b_target += J_ephoto_target.transpose() * photometric_term_weight * residual_photo; + *b_source += J_ephoto_source.transpose() * photometric_term_weight * residual_photo; - if (H_target && H_source && H_target_source && b_target && b_source) { - H_target->setZero(); - H_source->setZero(); - H_target_source->setZero(); - b_target->setZero(); - b_source->setZero(); - - for (int i = 0; i < num_threads; i++) { - (*H_target) += Hs_target[i]; - (*H_source) += Hs_source[i]; - (*H_target_source) += Hs_target_source[i]; - (*b_target) += bs_target[i]; - (*b_source) += bs_source[i]; - } - } + return error_geom + error_photo; + }; - return sum_errors_geom + sum_errors_photo; + if (is_omp_default()) { + return scan_matching_reduce_omp(perpoint_task, frame::size(*source), num_threads, H_target, H_source, H_target_source, b_target, b_source); + } else { + return scan_matching_reduce_tbb(perpoint_task, frame::size(*source), H_target, H_source, H_target_source, b_target, b_source); + } } } // namespace gtsam_points \ No newline at end of file diff --git a/src/gtsam_points/factors/intensity_gradients.cpp b/src/gtsam_points/factors/intensity_gradients.cpp index 9951340..2eb79a8 100644 --- a/src/gtsam_points/factors/intensity_gradients.cpp +++ b/src/gtsam_points/factors/intensity_gradients.cpp @@ -7,6 +7,12 @@ #include #include +#include +#include + +#ifdef GTSAM_POINTS_TBB +#include +#endif namespace gtsam_points { @@ -72,8 +78,7 @@ IntensityGradients::Ptr IntensityGradients::estimate(const PointCloud::ConstPtr& IntensityGradients::Ptr gradients(new IntensityGradients); gradients->intensity_gradients.resize(frame::size(*frame)); -#pragma omp parallel for num_threads(num_threads) schedule(guided, 8) - for (int i = 0; i < frame::size(*frame); i++) { + const auto perpoint_task = [&](int i) { std::vector k_indices(k_neighbors); std::vector k_sq_dists(k_neighbors); kdtree.knn_search(frame::point(*frame, i).data(), k_neighbors, k_indices.data(), k_sq_dists.data()); @@ -103,6 +108,24 @@ IntensityGradients::Ptr IntensityGradients::estimate(const PointCloud::ConstPtr& Eigen::Matrix3d H = (A.transpose() * A).block<3, 3>(0, 0); Eigen::Vector3d e = (A.transpose() * b).head<3>(); gradients->intensity_gradients[i] << H.inverse() * e, 0.0; + }; + + if (is_omp_default()) { +#pragma omp parallel for num_threads(num_threads) schedule(guided, 8) + for (int i = 0; i < frame->size(); i++) { + perpoint_task(i); + } + } else { +#ifdef GTSAM_POINTS_TBB + tbb::parallel_for(tbb::blocked_range(0, frame->size(), 8), [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); i++) { + perpoint_task(i); + } + }); +#else + std::cerr << "error: TBB is not available" << std::endl; + abort(); +#endif } return gradients; @@ -130,8 +153,7 @@ IntensityGradients::estimate(const gtsam_points::PointCloudCPU::Ptr& frame, int const int k_neighbors = std::max(k_geom_neighbors, k_photo_neighbors); -#pragma omp parallel for num_threads(num_threads) schedule(guided, 8) - for (int i = 0; i < frame->size(); i++) { + const auto perpoint_task = [&](int i) { std::vector k_indices(k_neighbors); std::vector k_sq_dists(k_neighbors); kdtree.knn_search(frame->points[i].data(), k_neighbors, k_indices.data(), k_sq_dists.data()); @@ -195,6 +217,24 @@ IntensityGradients::estimate(const gtsam_points::PointCloudCPU::Ptr& frame, int Eigen::Matrix3d H = (A.transpose() * A).block<3, 3>(0, 0); Eigen::Vector3d e = (A.transpose() * b).head<3>(); gradients->intensity_gradients[i] << H.inverse() * e, 0.0; + }; + + if (is_omp_default()) { +#pragma omp parallel for num_threads(num_threads) schedule(guided, 8) + for (int i = 0; i < frame->size(); i++) { + perpoint_task(i); + } + } else { +#ifdef GTSAM_POINTS_TBB + tbb::parallel_for(tbb::blocked_range(0, frame->size(), 8), [&](const tbb::blocked_range& range) { + for (int i = range.begin(); i < range.end(); i++) { + perpoint_task(i); + } + }); +#else + std::cerr << "error: TBB is not available" << std::endl; + abort(); +#endif } return gradients; diff --git a/src/test/test_colored_gicp.cpp b/src/test/test_colored_gicp.cpp index 7b16446..5cc8168 100644 --- a/src/test/test_colored_gicp.cpp +++ b/src/test/test_colored_gicp.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -104,16 +105,35 @@ struct ColoredGICPTestBase : public testing::Test { std::vector source_points; }; -TEST_F(ColoredGICPTestBase, Check) { +class ColoredGICPTest : public ColoredGICPTestBase, public testing::WithParamInterface> {}; + +INSTANTIATE_TEST_SUITE_P( + gtsam_points, + ColoredGICPTest, + testing::Combine(testing::Values("GICP"), testing::Values("NONE", "OMP", "TBB")), + [](const auto& info) { return std::get<0>(info.param) + "_" + std::get<1>(info.param); }); + +TEST_P(ColoredGICPTest, AlignmentTest) { + const auto param = GetParam(); + const std::string method = std::get<0>(param); + const std::string parallelism = std::get<1>(param); + const int num_threads = parallelism == "OMP" ? 2 : 1; + + if (parallelism == "TBB") { + gtsam_points::set_tbb_as_default(); + } else { + gtsam_points::set_omp_as_default(); + } + gtsam_points::PointCloudCPU::Ptr target(new gtsam_points::PointCloudCPU(target_points)); target->add_intensities(target_intensities); - auto target_gradients = gtsam_points::IntensityGradients::estimate(target, 10, 50, 1); + auto target_gradients = gtsam_points::IntensityGradients::estimate(target, 10, 50, num_threads); EXPECT_NE(target->normals, nullptr); EXPECT_NE(target->covs, nullptr); gtsam_points::PointCloud::Ptr target_ = target; - auto target_gradients2 = gtsam_points::IntensityGradients::estimate(target_, 50, 1); + auto target_gradients2 = gtsam_points::IntensityGradients::estimate(target_, 50, num_threads); gtsam_points::PointCloudCPU::Ptr source(new gtsam_points::PointCloudCPU(source_points)); source->add_intensities(source_intensities); @@ -123,13 +143,17 @@ TEST_F(ColoredGICPTestBase, Check) { std::shared_ptr target_intensity_tree( new gtsam_points::IntensityKdTree(target->points, target->intensities, target->size())); - test_factor(gtsam::make_shared(0, 1, target, source, target_tree, target_gradients), "DEFAULT"); - test_factor( - gtsam::make_shared(0, 1, target, source, target_intensity_tree, target_gradients), - "ESTIMATE_PHOTO_AND_GEOM"); - test_factor( - gtsam::make_shared(0, 1, target, source, target_intensity_tree, target_gradients2), - "ESTIMATE_PHOTO_ONLY"); + auto f1 = gtsam::make_shared(0, 1, target, source, target_tree, target_gradients); + f1->set_num_threads(num_threads); + test_factor(f1, "DEFAULT"); + + auto f2 = gtsam::make_shared(0, 1, target, source, target_intensity_tree, target_gradients); + f2->set_num_threads(num_threads); + test_factor(f2, "ESTIMATE_PHOTO_AND_GEOM"); + + auto f3 = gtsam::make_shared(0, 1, target, source, target_intensity_tree, target_gradients2); + f3->set_num_threads(num_threads); + test_factor(f3, "ESTIMATE_PHOTO_ONLY"); } int main(int argc, char** argv) {