diff --git a/include/robin/bitset.hpp b/include/robin/bitset.hpp new file mode 100644 index 0000000..f5f4a97 --- /dev/null +++ b/include/robin/bitset.hpp @@ -0,0 +1,86 @@ +// Copyright (c) 2020, Massachusetts Institute of Technology, +// Cambridge, MA 02139 +// All Rights Reserved +// Authors: Jingnan Shi, et al. (see THANKS for the full author list) +// See LICENSE for the license information + +#pragma once + +#include +#include +#include + +#include + +#include + +namespace robin { + +inline void bitset_set(uint64_t* bits, size_t k) { bits[k / 64] |= (1ULL << (k % 64)); } + +inline bool bitset_test(const uint64_t* bits, size_t k) { + return (bits[k / 64] >> (k % 64)) & 1ULL; +} + +/** + * @brief Count set bits in a bitset using parallel popcount + */ +inline size_t bitset_popcount(const uint64_t* bits, size_t num_words) { + size_t count = 0; +#pragma omp parallel for reduction(+ : count) + for (size_t w = 0; w < num_words; ++w) { + count += __builtin_popcountll(bits[w]); + } + return count; +} + +/** + * @brief Build CSR graph from edge bitset. + * The bitset has one bit per pair (i,j) in linear order. + * Builds undirected graph: each edge stored in both directions. + * @param bits Edge bitset + * @param N Number of vertices + * @param num_pairs N*(N-1)/2 + * @param offsets_out Output: CSR offsets array (size N+1), caller must delete[] + * @param edges_out Output: CSR edges array (size 2*edge_count), caller must delete[] + * @param edge_count_out Output: number of undirected edges + */ +inline void bitset_to_csr(const uint64_t* bits, size_t N, size_t num_pairs, size_t** offsets_out, + size_t** edges_out, size_t* edge_count_out) { + // Step 1: Compute per-vertex degree + auto* degrees = new size_t[N](); + for (size_t k = 0; k < num_pairs; ++k) { + if (bitset_test(bits, k)) { + auto [i, j] = pair_from_linear_index(k, N); + degrees[i]++; + degrees[j]++; + } + } + + // Step 2: Prefix sum to get offsets + auto* offsets = new size_t[N + 1]; + offsets[0] = 0; + for (size_t v = 0; v < N; ++v) { + offsets[v + 1] = offsets[v] + degrees[v]; + } + size_t total_entries = offsets[N]; // = 2 * edge_count + *edge_count_out = total_entries / 2; + + // Step 3: Fill edges + auto* edges = new size_t[total_entries]; + auto* pos = new size_t[N](); + for (size_t k = 0; k < num_pairs; ++k) { + if (bitset_test(bits, k)) { + auto [i, j] = pair_from_linear_index(k, N); + edges[offsets[i] + pos[i]++] = j; + edges[offsets[j] + pos[j]++] = i; + } + } + + *offsets_out = offsets; + *edges_out = edges; + delete[] degrees; + delete[] pos; +} + +} // namespace robin diff --git a/include/robin/distance.hpp b/include/robin/distance.hpp new file mode 100644 index 0000000..f7e78ab --- /dev/null +++ b/include/robin/distance.hpp @@ -0,0 +1,65 @@ +// Copyright (c) 2020, Massachusetts Institute of Technology, +// Cambridge, MA 02139 +// All Rights Reserved +// Authors: Jingnan Shi, et al. (see THANKS for the full author list) +// See LICENSE for the license information + +#pragma once + +#include +#include + +#include + +#include + +namespace robin { + +/** + * @brief Precompute all N*(N-1)/2 pairwise Euclidean distances for a set of points. + * Points are stored column-major in a (dim x N) matrix as raw double*. + * Output is float* for SIMD throughput (8 floats vs 4 doubles per 256-bit register). + */ +inline void precompute_pairwise_euclidean_f(const double* points, size_t dim, size_t N, float* out) { + size_t num_pairs = N * (N - 1) / 2; +#pragma omp parallel for schedule(static) + for (size_t k = 0; k < num_pairs; ++k) { + auto [i, j] = pair_from_linear_index(k, N); + const double* pi = points + i * dim; + const double* pj = points + j * dim; + double sum_sq = 0.0; + for (size_t d = 0; d < dim; ++d) { + double diff = pi[d] - pj[d]; + sum_sq += diff * diff; + } + out[k] = static_cast(std::sqrt(sum_sq)); + } +} + +/** + * @brief Precompute all N*(N-1)/2 pairwise cosine similarities. + * Vectors are stored column-major in a (dim x N) matrix as raw double*. + */ +inline void precompute_pairwise_cosine_f(const double* vectors, size_t dim, size_t N, float* out) { + size_t num_pairs = N * (N - 1) / 2; +#pragma omp parallel for schedule(static) + for (size_t k = 0; k < num_pairs; ++k) { + auto [i, j] = pair_from_linear_index(k, N); + const double* vi = vectors + i * dim; + const double* vj = vectors + j * dim; + double dot = 0.0, norm_i = 0.0, norm_j = 0.0; + for (size_t d = 0; d < dim; ++d) { + dot += vi[d] * vj[d]; + norm_i += vi[d] * vi[d]; + norm_j += vj[d] * vj[d]; + } + double cosine = dot / (std::sqrt(norm_i) * std::sqrt(norm_j)); + if (cosine > 1.0) + cosine = 1.0; + else if (cosine < -1.0) + cosine = -1.0; + out[k] = static_cast(cosine); + } +} + +} // namespace robin diff --git a/include/robin/math.hpp b/include/robin/math.hpp index f2d9cba..1e6e8ae 100644 --- a/include/robin/math.hpp +++ b/include/robin/math.hpp @@ -6,6 +6,8 @@ #pragma once +#include + #include #include #include @@ -14,6 +16,21 @@ namespace robin { +/** + * @brief Decode a linear index to a pair of indices for upper-triangular iteration. + * Maps k in [0, N*(N-1)/2) to unique pair (i,j) where 0 <= i < j < N. + * This is the same mapping used inline in core.hpp for pairwise iteration. + */ +inline std::pair pair_from_linear_index(size_t k, size_t N) { + size_t i = k / N; + size_t j = k % N; + if (j <= i) { + i = N - i - 2; + j = N - j - 1; + } + return {i, j}; +} + /** * Calculate (n choose k), also known as the binomial coefficient * @param n diff --git a/include/robin/robin.hpp b/include/robin/robin.hpp index ea45817..c406509 100644 --- a/include/robin/robin.hpp +++ b/include/robin/robin.hpp @@ -97,4 +97,27 @@ IGraph* Make3dNormalRegInvGraph( const Eigen::Vector2d& noise_bound, GraphsStorageType graph_storage_type = robin::GraphsStorageType::ATOMIC_CSR); +// +// Precomputed distance pipeline (optimized paths) +// + +/** + * @brief Build a CSR compatibility graph for 3D point registration using precomputed distances. + * Returns nullptr if memory budget exceeds 512MB, in which case caller falls back to existing path. + */ +CSRGraph* BuildCSRCompGraph_3dReg_precomputed(const double* src_points, const double* model_points, + size_t N, double noise_bound); + +/** + * @brief Build a CSR compatibility graph for 3D point+normal registration using precomputed + * distances. + * Returns nullptr if memory budget exceeds 512MB, in which case caller falls back to existing path. + */ +CSRGraph* BuildCSRCompGraph_3dNormalReg_precomputed(const double* src_points, + const double* src_normals, + const double* model_points, + const double* model_normals, size_t N, + double point_noise_bound, + double normal_noise_bound); + } // namespace robin diff --git a/src/robin.cpp b/src/robin.cpp index da6cbcf..d5253d2 100644 --- a/src/robin.cpp +++ b/src/robin.cpp @@ -4,6 +4,10 @@ // Authors: Jingnan Shi, et al. (see THANKS for the full author list) // See LICENSE for the license information +#include + +#include +#include #include std::vector robin::FindInlierStructure(const IGraph* g, @@ -91,6 +95,15 @@ robin::IGraph* robin::MakeRotAvgInvGraph(const std::vector& mea robin::IGraph* robin::Make3dRegInvGraph(const Eigen::Matrix3Xd& src_3d_points, const Eigen::Matrix3Xd& dst_3d_points, double noise_bound, GraphsStorageType graph_storage_type) { + // Try optimized precomputed path + size_t N = dst_3d_points.cols(); + auto* g_precomputed = + BuildCSRCompGraph_3dReg_precomputed(dst_3d_points.data(), src_3d_points.data(), N, noise_bound); + if (g_precomputed != nullptr) { + return g_precomputed; + } + + // Fall back to existing path // Measurement 3D points set Points3d measurements(dst_3d_points); @@ -119,6 +132,27 @@ robin::IGraph* robin::Make3dNormalRegInvGraph( assert(noise_bound.size() == 2); assert(src_3d_points_with_normals.cols() == dst_3d_points_with_normals.cols()); + size_t N = dst_3d_points_with_normals.cols(); + + // Transform the normal noise bound the same way the existing path does + double normal_noise_bound = std::cos(noise_bound(1)); + + // Extract contiguous point and normal data from the 6xN matrices. + // The 6xN matrix is column-major, so rows are interleaved. We need separate 3xN buffers. + Eigen::Matrix3Xd dst_points = dst_3d_points_with_normals.topRows<3>(); + Eigen::Matrix3Xd dst_normals = dst_3d_points_with_normals.bottomRows<3>(); + Eigen::Matrix3Xd src_points = src_3d_points_with_normals.topRows<3>(); + Eigen::Matrix3Xd src_normals = src_3d_points_with_normals.bottomRows<3>(); + + // Try optimized precomputed path + auto* g_precomputed = BuildCSRCompGraph_3dNormalReg_precomputed( + dst_points.data(), dst_normals.data(), src_points.data(), src_normals.data(), N, + noise_bound[0], normal_noise_bound); + if (g_precomputed != nullptr) { + return g_precomputed; + } + + // Fall back to existing path // Measurements Points3dWithNormals measurements(dst_3d_points_with_normals); @@ -126,9 +160,7 @@ robin::IGraph* robin::Make3dNormalRegInvGraph( Points3dWithNormals models(src_3d_points_with_normals); // Compatibility check function - Eigen::Vector2d updated_noise_bound = noise_bound; - updated_noise_bound(1) = std::cos(updated_noise_bound(1)); - PointsNormals3dRegCompCheck comp_check(&models, updated_noise_bound[0], updated_noise_bound[1]); + PointsNormals3dRegCompCheck comp_check(&models, noise_bound[0], normal_noise_bound); // InvGraphConstructor robin::CompGraphConstructor @@ -141,3 +173,119 @@ robin::IGraph* robin::Make3dNormalRegInvGraph( return g; } + +// +// Precomputed distance pipeline implementations +// + +robin::CSRGraph* robin::BuildCSRCompGraph_3dReg_precomputed(const double* src_points, + const double* model_points, size_t N, + double noise_bound) { + size_t num_pairs = N * (N - 1) / 2; + + // Memory budget check: if > 512MB for the two distance arrays, fall back + size_t mem_needed = num_pairs * sizeof(float) * 2; + if (mem_needed > 512ULL * 1024 * 1024) { + return nullptr; + } + + // Precompute distances + auto* src_dists = new float[num_pairs]; + auto* model_dists = new float[num_pairs]; + precompute_pairwise_euclidean_f(src_points, 3, N, src_dists); + precompute_pairwise_euclidean_f(model_points, 3, N, model_dists); + + // Bitset scan + size_t num_words = (num_pairs + 63) / 64; + auto* edge_bits = new uint64_t[num_words](); + float threshold_f = static_cast(noise_bound); + +#pragma omp parallel for schedule(static) + for (size_t k = 0; k < num_pairs; ++k) { + if (std::fabsf(src_dists[k] - model_dists[k]) <= threshold_f) { + size_t word = k / 64; + size_t bit = k % 64; + __atomic_or_fetch(&edge_bits[word], 1ULL << bit, __ATOMIC_RELAXED); + } + } + + delete[] src_dists; + delete[] model_dists; + + // Build CSR from bitset + size_t* offsets = nullptr; + size_t* edges = nullptr; + size_t edge_count = 0; + bitset_to_csr(edge_bits, N, num_pairs, &offsets, &edges, &edge_count); + delete[] edge_bits; + + auto* g = new CSRGraph(); + g->SetEdges(edge_count, edges); + g->SetOffsets(N, offsets); + return g; +} + +robin::CSRGraph* robin::BuildCSRCompGraph_3dNormalReg_precomputed( + const double* src_points, const double* src_normals, const double* model_points, + const double* model_normals, size_t N, double point_noise_bound, double normal_noise_bound) { + size_t num_pairs = N * (N - 1) / 2; + + // Memory budget check: 4 float arrays (src_dists, model_dists, src_cosines, model_cosines) + size_t mem_needed = num_pairs * sizeof(float) * 4; + if (mem_needed > 512ULL * 1024 * 1024) { + return nullptr; + } + + // Precompute point distances + auto* src_dists = new float[num_pairs]; + auto* model_dists = new float[num_pairs]; + precompute_pairwise_euclidean_f(src_points, 3, N, src_dists); + precompute_pairwise_euclidean_f(model_points, 3, N, model_dists); + + // Precompute normal cosine similarities + auto* src_cosines = new float[num_pairs]; + auto* model_cosines = new float[num_pairs]; + precompute_pairwise_cosine_f(src_normals, 3, N, src_cosines); + precompute_pairwise_cosine_f(model_normals, 3, N, model_cosines); + + // Bitset scan with short-circuit: point check first, then normal check + size_t num_words = (num_pairs + 63) / 64; + auto* edge_bits = new uint64_t[num_words](); + float pt_threshold_f = static_cast(point_noise_bound); + float normal_threshold_f = static_cast(normal_noise_bound); + +#pragma omp parallel for schedule(static) + for (size_t k = 0; k < num_pairs; ++k) { + // Point compatibility check + if (std::fabsf(src_dists[k] - model_dists[k]) > pt_threshold_f) { + continue; + } + // Normal compatibility check (same formula as PointsNormals3dRegCompCheck) + float ca = src_cosines[k]; + float cb = model_cosines[k]; + float lhs = ca * cb + std::sqrtf((1.0f - ca * ca) * (1.0f - cb * cb)); + if (lhs < normal_threshold_f) { + continue; + } + size_t word = k / 64; + size_t bit = k % 64; + __atomic_or_fetch(&edge_bits[word], 1ULL << bit, __ATOMIC_RELAXED); + } + + delete[] src_dists; + delete[] model_dists; + delete[] src_cosines; + delete[] model_cosines; + + // Build CSR from bitset + size_t* offsets = nullptr; + size_t* edges = nullptr; + size_t edge_count = 0; + bitset_to_csr(edge_bits, N, num_pairs, &offsets, &edges, &edge_count); + delete[] edge_bits; + + auto* g = new CSRGraph(); + g->SetEdges(edge_count, edges); + g->SetOffsets(N, offsets); + return g; +} diff --git a/tests/core_test.cpp b/tests/core_test.cpp index 08614e1..a9507ca 100644 --- a/tests/core_test.cpp +++ b/tests/core_test.cpp @@ -283,6 +283,74 @@ TEST_CASE("large compatibility graph adj list") { std::cout << "Core time (ms): " << core_finding_time / trials << std::endl; } +TEST_CASE("precomputed 3d reg graph matches existing path") { + // an arbitrary transformation matrix + Eigen::Matrix4d T; + // clang-format off + T << 9.96926560e-01, 6.68735757e-02, -4.06664421e-02, -1.15576939e-01, + -6.61289946e-02, 9.97617877e-01, 1.94008687e-02, -3.87705398e-02, + 4.18675510e-02, -1.66517807e-02, 9.98977765e-01, 1.14874890e-01, + 0, 0, 0, 1; + // clang-format on + + size_t N = 50; + + // random 3d points + Eigen::Matrix src_points = + Eigen::Matrix::Random(3, N); + Eigen::Matrix src_h; + src_h.resize(4, src_points.cols()); + src_h.topRows(3) = src_points; + src_h.bottomRows(1) = Eigen::Matrix::Ones(N); + + // apply transformation + Eigen::Matrix tgt_h = T * src_h; + Eigen::Matrix tgt_points = tgt_h.topRows(3); + + // noise bound + double noise_bound = 0.1; + + // add noise + Eigen::Matrix noise = + Eigen::Matrix::Random(3, N) * noise_bound; + tgt_points = tgt_points + noise; + + SECTION("precomputed produces same edge set as atomic CSR") { + // Build graph via existing atomic CSR path + robin::Points3d measurements(tgt_points); + robin::Points3d model(src_points); + robin::Points3dRegCompCheck comp_check(&model, noise_bound); + robin::CompGraphConstructor graph_constructor; + graph_constructor.SetCompCheckFunction(&comp_check); + graph_constructor.SetMeasurements(&measurements); + robin::AtomicCSRGraph existing_g; + graph_constructor.BuildCSRCompGraph_parallel_two_passes_atomic(&existing_g); + + // Build graph via precomputed path + auto* precomputed_g = robin::BuildCSRCompGraph_3dReg_precomputed( + tgt_points.data(), src_points.data(), N, noise_bound); + REQUIRE(precomputed_g != nullptr); + + // Compare: extract sorted edge sets from both graphs + REQUIRE(precomputed_g->VertexCount() == existing_g.VertexCount()); + + // Compare per-vertex neighbor sets + for (size_t v = 0; v < N; ++v) { + std::set existing_neighbors; + for (size_t e = 0; e < existing_g.GetVertexDegree(v); ++e) { + existing_neighbors.insert(existing_g.GetVertexEdge(v, e)); + } + std::set precomputed_neighbors; + for (size_t e = 0; e < precomputed_g->GetVertexDegree(v); ++e) { + precomputed_neighbors.insert(precomputed_g->GetVertexEdge(v, e)); + } + REQUIRE(existing_neighbors == precomputed_neighbors); + } + + delete precomputed_g; + } +} + TEST_CASE("sample comp graph construction") { SECTION("2d vector averaging") { // diff --git a/tests/math_test.cpp b/tests/math_test.cpp index 50a873d..6fd4b19 100644 --- a/tests/math_test.cpp +++ b/tests/math_test.cpp @@ -51,6 +51,33 @@ TEST_CASE("n choose k") { } } +TEST_CASE("pair_from_linear_index") { + SECTION("N=5: all 10 pairs unique and valid") { + size_t N = 5; + std::set> pairs; + for (size_t k = 0; k < N * (N - 1) / 2; ++k) { + auto [i, j] = robin::pair_from_linear_index(k, N); + REQUIRE(i < j); + REQUIRE(j < N); + pairs.insert({i, j}); + } + REQUIRE(pairs.size() == N * (N - 1) / 2); + } + + SECTION("round-trip for various N") { + for (size_t N : {10, 50, 100}) { + std::set> pairs; + for (size_t k = 0; k < N * (N - 1) / 2; ++k) { + auto [i, j] = robin::pair_from_linear_index(k, N); + REQUIRE(i < j); + REQUIRE(j < N); + pairs.insert({i, j}); + } + REQUIRE(pairs.size() == N * (N - 1) / 2); + } + } +} + TEST_CASE("Combination decode") { SECTION("2 choose 2") { size_t n = 2; diff --git a/tests/robin_test.cpp b/tests/robin_test.cpp index 6a8fa57..8362b51 100644 --- a/tests/robin_test.cpp +++ b/tests/robin_test.cpp @@ -351,6 +351,86 @@ TEST_CASE("3d reg") { } } +TEST_CASE("3d reg precomputed pipeline") { + // Verify the optimized precomputed path (used by Make3dRegInvGraph) produces correct results + Eigen::Matrix4d T; + // clang-format off + T << 9.96926560e-01, 6.68735757e-02, -4.06664421e-02, -1.15576939e-01, + -6.61289946e-02, 9.97617877e-01, 1.94008687e-02, -3.87705398e-02, + 4.18675510e-02, -1.66517807e-02, 9.98977765e-01, 1.14874890e-01, + 0, 0, 0, 1; + // clang-format on + + SECTION("no outliers") { + size_t N = 20; + Eigen::Matrix src_points = + Eigen::Matrix::Random(3, N); + Eigen::Matrix src_h; + src_h.resize(4, src_points.cols()); + src_h.topRows(3) = src_points; + src_h.bottomRows(1) = Eigen::Matrix::Ones(N); + + Eigen::Matrix tgt_h = T * src_h; + Eigen::Matrix tgt_points = tgt_h.topRows(3); + + double noise_bound = 0.1; + Eigen::Matrix noise = + Eigen::Matrix::Random(3, N) * noise_bound; + tgt_points = tgt_points + noise; + + // Make3dRegInvGraph now uses precomputed path internally + auto* g = robin::Make3dRegInvGraph(src_points, tgt_points, 2 * noise_bound); + + auto actual_max_clique_indices = + robin::FindInlierStructure(g, robin::InlierGraphStructure::MAX_CLIQUE); + std::sort(actual_max_clique_indices.begin(), actual_max_clique_indices.end()); + + // All points should be inliers + std::vector expected_inliers; + for (size_t i = 0; i < N; ++i) + expected_inliers.push_back(i); + REQUIRE_THAT(actual_max_clique_indices, Catch::Equals(expected_inliers)); + + delete g; + } + + SECTION("with outliers") { + size_t N = 20; + Eigen::Matrix src_points = + Eigen::Matrix::Random(3, N); + Eigen::Matrix src_h; + src_h.resize(4, src_points.cols()); + src_h.topRows(3) = src_points; + src_h.bottomRows(1) = Eigen::Matrix::Ones(N); + + Eigen::Matrix tgt_h = T * src_h; + Eigen::Matrix tgt_points = tgt_h.topRows(3); + + double noise_bound = 0.1; + Eigen::Matrix noise = + Eigen::Matrix::Random(3, N) * noise_bound; + tgt_points = tgt_points + noise; + + // Create outliers at indices 2 and 7 + tgt_points.col(2) *= 100; + tgt_points.col(7) *= 100; + + auto* g = robin::Make3dRegInvGraph(src_points, tgt_points, 2 * noise_bound); + + auto actual_max_clique_indices = + robin::FindInlierStructure(g, robin::InlierGraphStructure::MAX_CLIQUE); + std::sort(actual_max_clique_indices.begin(), actual_max_clique_indices.end()); + + // Outlier indices should not appear in clique + std::set clique_set(actual_max_clique_indices.begin(), actual_max_clique_indices.end()); + REQUIRE(clique_set.count(2) == 0); + REQUIRE(clique_set.count(7) == 0); + REQUIRE(actual_max_clique_indices.size() == N - 2); + + delete g; + } +} + TEST_CASE("3d reg large instance") { // For benchmarking / profiling // an arbitrary transformation matrix