Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions include/robin/bitset.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (c) 2020, Massachusetts Institute of Technology,
// Cambridge, MA 02139
// All Rights Reserved
// Authors: Jingnan Shi, et al. (see THANKS for the full author list)
// See LICENSE for the license information

#pragma once

#include <cstddef>
#include <cstdint>
#include <cstring>

#include <omp.h>

#include <robin/math.hpp>

namespace robin {

inline void bitset_set(uint64_t* bits, size_t k) { bits[k / 64] |= (1ULL << (k % 64)); }

inline bool bitset_test(const uint64_t* bits, size_t k) {
return (bits[k / 64] >> (k % 64)) & 1ULL;
}

/**
* @brief Count set bits in a bitset using parallel popcount
*/
inline size_t bitset_popcount(const uint64_t* bits, size_t num_words) {
size_t count = 0;
#pragma omp parallel for reduction(+ : count)
for (size_t w = 0; w < num_words; ++w) {
count += __builtin_popcountll(bits[w]);
}
return count;
}

/**
* @brief Build CSR graph from edge bitset.
* The bitset has one bit per pair (i,j) in linear order.
* Builds undirected graph: each edge stored in both directions.
* @param bits Edge bitset
* @param N Number of vertices
* @param num_pairs N*(N-1)/2
* @param offsets_out Output: CSR offsets array (size N+1), caller must delete[]
* @param edges_out Output: CSR edges array (size 2*edge_count), caller must delete[]
* @param edge_count_out Output: number of undirected edges
*/
inline void bitset_to_csr(const uint64_t* bits, size_t N, size_t num_pairs, size_t** offsets_out,
size_t** edges_out, size_t* edge_count_out) {
// Step 1: Compute per-vertex degree
auto* degrees = new size_t[N]();
for (size_t k = 0; k < num_pairs; ++k) {
if (bitset_test(bits, k)) {
auto [i, j] = pair_from_linear_index(k, N);
degrees[i]++;
degrees[j]++;
}
}

// Step 2: Prefix sum to get offsets
auto* offsets = new size_t[N + 1];
offsets[0] = 0;
for (size_t v = 0; v < N; ++v) {
offsets[v + 1] = offsets[v] + degrees[v];
}
size_t total_entries = offsets[N]; // = 2 * edge_count
*edge_count_out = total_entries / 2;

// Step 3: Fill edges
auto* edges = new size_t[total_entries];
auto* pos = new size_t[N]();
for (size_t k = 0; k < num_pairs; ++k) {
if (bitset_test(bits, k)) {
auto [i, j] = pair_from_linear_index(k, N);
edges[offsets[i] + pos[i]++] = j;
edges[offsets[j] + pos[j]++] = i;
}
}

*offsets_out = offsets;
*edges_out = edges;
delete[] degrees;
delete[] pos;
}

} // namespace robin
65 changes: 65 additions & 0 deletions include/robin/distance.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright (c) 2020, Massachusetts Institute of Technology,
// Cambridge, MA 02139
// All Rights Reserved
// Authors: Jingnan Shi, et al. (see THANKS for the full author list)
// See LICENSE for the license information

#pragma once

#include <cmath>
#include <cstddef>

#include <omp.h>

#include <robin/math.hpp>

namespace robin {

/**
* @brief Precompute all N*(N-1)/2 pairwise Euclidean distances for a set of points.
* Points are stored column-major in a (dim x N) matrix as raw double*.
* Output is float* for SIMD throughput (8 floats vs 4 doubles per 256-bit register).
*/
inline void precompute_pairwise_euclidean_f(const double* points, size_t dim, size_t N, float* out) {
size_t num_pairs = N * (N - 1) / 2;
#pragma omp parallel for schedule(static)
for (size_t k = 0; k < num_pairs; ++k) {
auto [i, j] = pair_from_linear_index(k, N);
const double* pi = points + i * dim;
const double* pj = points + j * dim;
double sum_sq = 0.0;
for (size_t d = 0; d < dim; ++d) {
double diff = pi[d] - pj[d];
sum_sq += diff * diff;
}
out[k] = static_cast<float>(std::sqrt(sum_sq));
}
}

/**
* @brief Precompute all N*(N-1)/2 pairwise cosine similarities.
* Vectors are stored column-major in a (dim x N) matrix as raw double*.
*/
inline void precompute_pairwise_cosine_f(const double* vectors, size_t dim, size_t N, float* out) {
size_t num_pairs = N * (N - 1) / 2;
#pragma omp parallel for schedule(static)
for (size_t k = 0; k < num_pairs; ++k) {
auto [i, j] = pair_from_linear_index(k, N);
const double* vi = vectors + i * dim;
const double* vj = vectors + j * dim;
double dot = 0.0, norm_i = 0.0, norm_j = 0.0;
for (size_t d = 0; d < dim; ++d) {
dot += vi[d] * vj[d];
norm_i += vi[d] * vi[d];
norm_j += vj[d] * vj[d];
}
double cosine = dot / (std::sqrt(norm_i) * std::sqrt(norm_j));
if (cosine > 1.0)
cosine = 1.0;
else if (cosine < -1.0)
cosine = -1.0;
out[k] = static_cast<float>(cosine);
}
}

} // namespace robin
17 changes: 17 additions & 0 deletions include/robin/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#pragma once

#include <utility>

#include <Eigen/Core>
#include <Eigen/Dense>
#include <iostream>
Expand All @@ -14,6 +16,21 @@

namespace robin {

/**
* @brief Decode a linear index to a pair of indices for upper-triangular iteration.
* Maps k in [0, N*(N-1)/2) to unique pair (i,j) where 0 <= i < j < N.
* This is the same mapping used inline in core.hpp for pairwise iteration.
*/
inline std::pair<size_t, size_t> pair_from_linear_index(size_t k, size_t N) {
size_t i = k / N;
size_t j = k % N;
if (j <= i) {
i = N - i - 2;
j = N - j - 1;
}
return {i, j};
}

/**
* Calculate (n choose k), also known as the binomial coefficient
* @param n
Expand Down
23 changes: 23 additions & 0 deletions include/robin/robin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,27 @@ IGraph* Make3dNormalRegInvGraph(
const Eigen::Vector2d& noise_bound,
GraphsStorageType graph_storage_type = robin::GraphsStorageType::ATOMIC_CSR);

//
// Precomputed distance pipeline (optimized paths)
//

/**
* @brief Build a CSR compatibility graph for 3D point registration using precomputed distances.
* Returns nullptr if memory budget exceeds 512MB, in which case caller falls back to existing path.
*/
CSRGraph* BuildCSRCompGraph_3dReg_precomputed(const double* src_points, const double* model_points,
size_t N, double noise_bound);

/**
* @brief Build a CSR compatibility graph for 3D point+normal registration using precomputed
* distances.
* Returns nullptr if memory budget exceeds 512MB, in which case caller falls back to existing path.
*/
CSRGraph* BuildCSRCompGraph_3dNormalReg_precomputed(const double* src_points,
const double* src_normals,
const double* model_points,
const double* model_normals, size_t N,
double point_noise_bound,
double normal_noise_bound);

} // namespace robin
154 changes: 151 additions & 3 deletions src/robin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
// Authors: Jingnan Shi, et al. (see THANKS for the full author list)
// See LICENSE for the license information

#include <cmath>

#include <robin/bitset.hpp>
#include <robin/distance.hpp>
#include <robin/robin.hpp>

std::vector<size_t> robin::FindInlierStructure(const IGraph* g,
Expand Down Expand Up @@ -91,6 +95,15 @@ robin::IGraph* robin::MakeRotAvgInvGraph(const std::vector<Eigen::Matrix3d>& mea
robin::IGraph* robin::Make3dRegInvGraph(const Eigen::Matrix3Xd& src_3d_points,
const Eigen::Matrix3Xd& dst_3d_points, double noise_bound,
GraphsStorageType graph_storage_type) {
// Try optimized precomputed path
size_t N = dst_3d_points.cols();
auto* g_precomputed =
BuildCSRCompGraph_3dReg_precomputed(dst_3d_points.data(), src_3d_points.data(), N, noise_bound);
if (g_precomputed != nullptr) {
return g_precomputed;
}

// Fall back to existing path
// Measurement 3D points set
Points3d measurements(dst_3d_points);

Expand Down Expand Up @@ -119,16 +132,35 @@ robin::IGraph* robin::Make3dNormalRegInvGraph(
assert(noise_bound.size() == 2);
assert(src_3d_points_with_normals.cols() == dst_3d_points_with_normals.cols());

size_t N = dst_3d_points_with_normals.cols();

// Transform the normal noise bound the same way the existing path does
double normal_noise_bound = std::cos(noise_bound(1));

// Extract contiguous point and normal data from the 6xN matrices.
// The 6xN matrix is column-major, so rows are interleaved. We need separate 3xN buffers.
Eigen::Matrix3Xd dst_points = dst_3d_points_with_normals.topRows<3>();
Eigen::Matrix3Xd dst_normals = dst_3d_points_with_normals.bottomRows<3>();
Eigen::Matrix3Xd src_points = src_3d_points_with_normals.topRows<3>();
Eigen::Matrix3Xd src_normals = src_3d_points_with_normals.bottomRows<3>();

// Try optimized precomputed path
auto* g_precomputed = BuildCSRCompGraph_3dNormalReg_precomputed(
dst_points.data(), dst_normals.data(), src_points.data(), src_normals.data(), N,
noise_bound[0], normal_noise_bound);
if (g_precomputed != nullptr) {
return g_precomputed;
}

// Fall back to existing path
// Measurements
Points3dWithNormals measurements(dst_3d_points_with_normals);

// Models
Points3dWithNormals models(src_3d_points_with_normals);

// Compatibility check function
Eigen::Vector2d updated_noise_bound = noise_bound;
updated_noise_bound(1) = std::cos(updated_noise_bound(1));
PointsNormals3dRegCompCheck comp_check(&models, updated_noise_bound[0], updated_noise_bound[1]);
PointsNormals3dRegCompCheck comp_check(&models, noise_bound[0], normal_noise_bound);

// InvGraphConstructor
robin::CompGraphConstructor<Points3dWithNormals, robin::PointsNormals3dRegCompCheck, 2>
Expand All @@ -141,3 +173,119 @@ robin::IGraph* robin::Make3dNormalRegInvGraph(

return g;
}

//
// Precomputed distance pipeline implementations
//

robin::CSRGraph* robin::BuildCSRCompGraph_3dReg_precomputed(const double* src_points,
const double* model_points, size_t N,
double noise_bound) {
size_t num_pairs = N * (N - 1) / 2;

// Memory budget check: if > 512MB for the two distance arrays, fall back
size_t mem_needed = num_pairs * sizeof(float) * 2;
if (mem_needed > 512ULL * 1024 * 1024) {
return nullptr;
}

// Precompute distances
auto* src_dists = new float[num_pairs];
auto* model_dists = new float[num_pairs];
precompute_pairwise_euclidean_f(src_points, 3, N, src_dists);
precompute_pairwise_euclidean_f(model_points, 3, N, model_dists);

// Bitset scan
size_t num_words = (num_pairs + 63) / 64;
auto* edge_bits = new uint64_t[num_words]();
float threshold_f = static_cast<float>(noise_bound);

#pragma omp parallel for schedule(static)
for (size_t k = 0; k < num_pairs; ++k) {
if (std::fabsf(src_dists[k] - model_dists[k]) <= threshold_f) {
size_t word = k / 64;
size_t bit = k % 64;
__atomic_or_fetch(&edge_bits[word], 1ULL << bit, __ATOMIC_RELAXED);
}
}

delete[] src_dists;
delete[] model_dists;

// Build CSR from bitset
size_t* offsets = nullptr;
size_t* edges = nullptr;
size_t edge_count = 0;
bitset_to_csr(edge_bits, N, num_pairs, &offsets, &edges, &edge_count);
delete[] edge_bits;

auto* g = new CSRGraph();
g->SetEdges(edge_count, edges);
g->SetOffsets(N, offsets);
return g;
}

robin::CSRGraph* robin::BuildCSRCompGraph_3dNormalReg_precomputed(
const double* src_points, const double* src_normals, const double* model_points,
const double* model_normals, size_t N, double point_noise_bound, double normal_noise_bound) {
size_t num_pairs = N * (N - 1) / 2;

// Memory budget check: 4 float arrays (src_dists, model_dists, src_cosines, model_cosines)
size_t mem_needed = num_pairs * sizeof(float) * 4;
if (mem_needed > 512ULL * 1024 * 1024) {
return nullptr;
}

// Precompute point distances
auto* src_dists = new float[num_pairs];
auto* model_dists = new float[num_pairs];
precompute_pairwise_euclidean_f(src_points, 3, N, src_dists);
precompute_pairwise_euclidean_f(model_points, 3, N, model_dists);

// Precompute normal cosine similarities
auto* src_cosines = new float[num_pairs];
auto* model_cosines = new float[num_pairs];
precompute_pairwise_cosine_f(src_normals, 3, N, src_cosines);
precompute_pairwise_cosine_f(model_normals, 3, N, model_cosines);

// Bitset scan with short-circuit: point check first, then normal check
size_t num_words = (num_pairs + 63) / 64;
auto* edge_bits = new uint64_t[num_words]();
float pt_threshold_f = static_cast<float>(point_noise_bound);
float normal_threshold_f = static_cast<float>(normal_noise_bound);

#pragma omp parallel for schedule(static)
for (size_t k = 0; k < num_pairs; ++k) {
// Point compatibility check
if (std::fabsf(src_dists[k] - model_dists[k]) > pt_threshold_f) {
continue;
}
// Normal compatibility check (same formula as PointsNormals3dRegCompCheck)
float ca = src_cosines[k];
float cb = model_cosines[k];
float lhs = ca * cb + std::sqrtf((1.0f - ca * ca) * (1.0f - cb * cb));
if (lhs < normal_threshold_f) {
continue;
}
size_t word = k / 64;
size_t bit = k % 64;
__atomic_or_fetch(&edge_bits[word], 1ULL << bit, __ATOMIC_RELAXED);
}

delete[] src_dists;
delete[] model_dists;
delete[] src_cosines;
delete[] model_cosines;

// Build CSR from bitset
size_t* offsets = nullptr;
size_t* edges = nullptr;
size_t edge_count = 0;
bitset_to_csr(edge_bits, N, num_pairs, &offsets, &edges, &edge_count);
delete[] edge_bits;

auto* g = new CSRGraph();
g->SetEdges(edge_count, edges);
g->SetOffsets(N, offsets);
return g;
}
Loading