Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cpp/cmake_modules/DefineOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ takes precedence over ccache if a storage backend is configured" ON)
"SSE4_2"
"AVX2"
"AVX512"
"SVE128" # fixed size SVE
Comment thread
pitrou marked this conversation as resolved.
"SVE256" # "
"SVE512" # "
Comment thread
pitrou marked this conversation as resolved.
"MAX")

define_option(ARROW_ALTIVEC "Build with Altivec if compiler has support" ON)
Expand Down
32 changes: 28 additions & 4 deletions cpp/cmake_modules/SetupCxxFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,31 @@ elseif(ARROW_CPU_FLAG STREQUAL "ppc")
elseif(ARROW_CPU_FLAG STREQUAL "aarch64")
# Arm64 compiler flags, gcc/clang only
set(ARROW_ARMV8_MARCH "armv8-a")
Comment thread
pitrou marked this conversation as resolved.
check_cxx_compiler_flag("-march=${ARROW_ARMV8_MARCH}+sve" CXX_SUPPORTS_SVE)
set(ARROW_SVE_FLAGS "-march=${ARROW_ARMV8_MARCH}+sve")
set(ARROW_SVE128_FLAGS "${ARROW_SVE_FLAGS}" "-msve-vector-bits=128")
set(ARROW_SVE256_FLAGS "${ARROW_SVE_FLAGS}" "-msve-vector-bits=256")
set(ARROW_SVE512_FLAGS "${ARROW_SVE_FLAGS}" "-msve-vector-bits=512")
# We only have a way to do SVE dynamic dispatch on Linux (BSD may be possible
# but is currently not implemented).
# We still support explicitly setting runtime SIMD level to some SVE values
# on these platforms as this can be useful in development for building SVE
# code locally. The compiler supports it but the code won't run.
if((APPLE OR WIN32) AND ARROW_RUNTIME_SIMD_LEVEL STREQUAL "MAX")
set(ARROW_RUNTIME_SIMD_LEVEL "NONE")
endif()
check_cxx_compiler_flag("${ARROW_SVE_FLAGS}" CXX_SUPPORTS_SVE)
if(CXX_SUPPORTS_SVE AND ARROW_RUNTIME_SIMD_LEVEL MATCHES "^(SVE128|SVE256|SVE512|MAX)$")
set(ARROW_HAVE_RUNTIME_SVE128 ON)
add_definitions(-DARROW_HAVE_RUNTIME_SVE128)
endif()
if(CXX_SUPPORTS_SVE AND ARROW_RUNTIME_SIMD_LEVEL MATCHES "^(SVE256|SVE512|MAX)$")
set(ARROW_HAVE_RUNTIME_SVE256 ON)
add_definitions(-DARROW_HAVE_RUNTIME_SVE256)
endif()
if(CXX_SUPPORTS_SVE AND ARROW_RUNTIME_SIMD_LEVEL MATCHES "^(SVE512|MAX)$")
set(ARROW_HAVE_RUNTIME_SVE512 ON)
add_definitions(-DARROW_HAVE_RUNTIME_SVE512)
endif()
if(ARROW_SIMD_LEVEL STREQUAL "DEFAULT")
set(ARROW_SIMD_LEVEL "NEON")
endif()
Expand Down Expand Up @@ -528,8 +552,7 @@ if(ARROW_CPU_FLAG STREQUAL "aarch64")
if(NOT CXX_SUPPORTS_SVE)
message(FATAL_ERROR "SVE required but compiler doesn't support it.")
endif()
# -march=armv8-a+sve
set(ARROW_ARMV8_MARCH "${ARROW_ARMV8_MARCH}+sve")
Comment thread
AntoinePrv marked this conversation as resolved.
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} ${ARROW_SVE_FLAGS}")
string(REGEX MATCH "[0-9]+" SVE_VECTOR_BITS ${ARROW_SIMD_LEVEL})
if(SVE_VECTOR_BITS)
set(ARROW_HAVE_SVE${SVE_VECTOR_BITS} ON)
Expand All @@ -540,8 +563,9 @@ if(ARROW_CPU_FLAG STREQUAL "aarch64")
set(ARROW_HAVE_SVE_SIZELESS ON)
add_definitions(-DARROW_HAVE_SVE_SIZELESS)
endif()
else() # ARM v8 without SVE
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=${ARROW_ARMV8_MARCH}")
endif()
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -march=${ARROW_ARMV8_MARCH}")
elseif(NOT ARROW_SIMD_LEVEL STREQUAL "NONE")
message(WARNING "ARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL} not supported by Arm.")
endif()
Expand Down
28 changes: 26 additions & 2 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,27 @@ macro(append_runtime_avx512_src SRCS SRC)
endif()
endmacro()

macro(append_runtime_sve128_src SRCS SRC)
if(ARROW_HAVE_RUNTIME_SVE128)
list(APPEND ${SRCS} ${SRC})
set_source_files_properties(${SRC} PROPERTIES COMPILE_OPTIONS "${ARROW_SVE128_FLAGS}")
endif()
endmacro()

macro(append_runtime_sve256_src SRCS SRC)
if(ARROW_HAVE_RUNTIME_SVE256)
list(APPEND ${SRCS} ${SRC})
set_source_files_properties(${SRC} PROPERTIES COMPILE_OPTIONS "${ARROW_SVE256_FLAGS}")
endif()
endmacro()

macro(append_runtime_sve512_src SRCS SRC)
if(ARROW_HAVE_RUNTIME_SVE512)
list(APPEND ${SRCS} ${SRC})
set_source_files_properties(${SRC} PROPERTIES COMPILE_OPTIONS "${ARROW_SVE512_FLAGS}")
endif()
endmacro()

# Write out compile-time configuration constants
string(REPLACE "${CMAKE_SOURCE_DIR}" "<CMAKE_SOURCE_DIR>" REDACTED_CXX_FLAGS
${CMAKE_CXX_FLAGS})
Expand Down Expand Up @@ -498,7 +519,7 @@ set(ARROW_UTIL_SRCS
util/bitmap_ops.cc
util/bpacking.cc
util/bpacking_scalar.cc
util/bpacking_simd_default.cc
util/bpacking_simd_128.cc
util/byte_size.cc
util/byte_stream_split_internal.cc
util/cancel.cc
Expand Down Expand Up @@ -543,9 +564,12 @@ set(ARROW_UTIL_SRCS

append_runtime_avx2_src(ARROW_UTIL_SRCS util/byte_stream_split_internal_avx2.cc)

append_runtime_avx2_src(ARROW_UTIL_SRCS util/bpacking_simd_avx2.cc)
append_runtime_avx2_src(ARROW_UTIL_SRCS util/bpacking_simd_256.cc)
append_runtime_avx512_src(ARROW_UTIL_SRCS util/bpacking_simd_avx512.cc)

append_runtime_sve128_src(ARROW_UTIL_SRCS util/bpacking_simd_128_alt.cc)
append_runtime_sve256_src(ARROW_UTIL_SRCS util/bpacking_simd_256.cc)

if(ARROW_WITH_BROTLI)
list(APPEND ARROW_UTIL_SRCS util/compression_brotli.cc)
endif()
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ arrow_util_srcs = [
'util/bitmap_ops.cc',
'util/bpacking.cc',
'util/bpacking_scalar.cc',
'util/bpacking_simd_default.cc',
'util/bpacking_simd_128.cc',
'util/byte_size.cc',
'util/byte_stream_split_internal.cc',
'util/cancel.cc',
Expand Down
39 changes: 27 additions & 12 deletions cpp/src/arrow/util/bpacking.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,29 @@ struct UnpackDynamicFunction {

static constexpr auto implementations() {
return std::array{
// x86 implementations
#if defined(ARROW_HAVE_SSE4_2)
Implementation{DispatchLevel::NONE, &bpacking::unpack_sse4_2<Uint>},
#else
Implementation{DispatchLevel::NONE, &bpacking::unpack_scalar<Uint>},
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX2)
# if defined(ARROW_HAVE_RUNTIME_AVX2)
Implementation{DispatchLevel::AVX2, &bpacking::unpack_avx2<Uint>},
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX512)
# endif
# if defined(ARROW_HAVE_RUNTIME_AVX512)
Implementation{DispatchLevel::AVX512, &bpacking::unpack_avx512<Uint>},
# endif

// ARM implementations
#elif defined(ARROW_HAVE_NEON)
Implementation{DispatchLevel::NONE, &bpacking::unpack_neon<Uint>},
# if defined(ARROW_HAVE_RUNTIME_SVE128)
Implementation{DispatchLevel::SVE128, &bpacking::unpack_sve128<Uint>},
# endif
# if defined(ARROW_HAVE_RUNTIME_SVE256)
Comment thread
AntoinePrv marked this conversation as resolved.
Implementation{DispatchLevel::SVE256, &bpacking::unpack_sve256<Uint>},
# endif

// Other implementations
#else
Implementation{DispatchLevel::NONE, &bpacking::unpack_scalar<Uint>},
#endif
};
}
Expand All @@ -52,12 +65,14 @@ struct UnpackDynamicFunction {

template <typename Uint>
void unpack(const uint8_t* in, Uint* out, const UnpackOptions& opts) {
#if defined(ARROW_HAVE_NEON)
return bpacking::unpack_neon(in, out, opts);
#else
static DynamicDispatch<UnpackDynamicFunction<Uint> > dispatch;
return dispatch.func(in, out, opts);
#endif
auto constexpr kImplementations = UnpackDynamicFunction<Uint>::implementations();
if constexpr (kImplementations.size() == 1) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this condition actually useful? I guess it's a shortcut, but it's not obvious that it applies to common cases (x86 or ARM with default SIMD options).

At worse, this could be added generically to DynamicDispatch instead. But I doubt it's worth it.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is worth it to avoid additional #ifdef, for instance on Macos there is only neon and no SVE (no need to dyn dispatch).
Previously we'd exclude the Neon version from the dynamic dispatch and go #ifdef ARROW_HAVE_NEON then go straight to Neon implementation.

At worse, this could be added generically to DynamicDispatch instead. But I doubt it's worth it.

Actually done in GH-49840 so either way here (we'd need to adapt the PR that is not merged first).

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually done in GH-49840 so either way here

That PR might prove difficult to adapt for all the lousy compilers we have to support, so I'd rather focus on this one first :)

constexpr auto func = kImplementations.front().second;
func(in, out, opts);
} else {
static DynamicDispatch<UnpackDynamicFunction<Uint> > dispatch;
return dispatch.func(in, out, opts);
}
}

template void unpack<bool>(const uint8_t*, bool*, const UnpackOptions&);
Expand Down
166 changes: 55 additions & 111 deletions cpp/src/arrow/util/bpacking_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#include "arrow/util/bpacking_scalar_internal.h"
#include "arrow/util/bpacking_simd_internal.h"

#if defined(ARROW_HAVE_RUNTIME_AVX2)
#if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_SVE128)
# include "arrow/util/cpu_info.h"
#endif

Expand Down Expand Up @@ -107,10 +107,10 @@ void BM_Unpack(benchmark::State& state, bool aligned, UnpackFunc<Int> unpack, bo
// will not emit runs larger than 512 (though other implementation might), so we biased
// the benchmarks towards a rather small scale.
static const auto kNumValuesRange = benchmark::CreateRange(32, 512, 2);
constexpr std::initializer_list<int64_t> kBitWidths8 = {1, 2, 8};
constexpr std::initializer_list<int64_t> kBitWidths16 = {1, 2, 8, 13};
constexpr std::initializer_list<int64_t> kBitWidths32 = {1, 2, 8, 20};
constexpr std::initializer_list<int64_t> kBitWidths64 = {1, 2, 8, 20, 47};
constexpr auto kBitWidths8 = std::initializer_list<int64_t>{1, 2, 8};
constexpr auto kBitWidths16 = std::initializer_list<int64_t>{1, 2, 8, 13};
constexpr auto kBitWidths32 = std::initializer_list<int64_t>{1, 2, 8, 20};
constexpr auto kBitWidths64 = std::initializer_list<int64_t>{1, 2, 8, 20, 47};

static const std::vector<std::vector<int64_t>> kBitWidthsNumValuesBool = {
{0, 1},
Expand Down Expand Up @@ -159,125 +159,69 @@ void BM_UnpackUint64(benchmark::State& state, bool aligned, UnpackFunc<uint64_t>
return BM_Unpack<uint64_t>(state, aligned, unpack, skip, std::move(skip_msg));
}

BENCHMARK_CAPTURE(BM_UnpackBool, ScalarUnaligned, false, &bpacking::unpack_scalar<bool>)
->ArgsProduct(kBitWidthsNumValuesBool);
BENCHMARK_CAPTURE(BM_UnpackUint8, ScalarUnaligned, false,
&bpacking::unpack_scalar<uint8_t>)
->ArgsProduct(kBitWidthsNumValues8);
BENCHMARK_CAPTURE(BM_UnpackUint16, ScalarUnaligned, false,
&bpacking::unpack_scalar<uint16_t>)
->ArgsProduct(kBitWidthsNumValues16);
BENCHMARK_CAPTURE(BM_UnpackUint32, ScalarUnaligned, false,
&bpacking::unpack_scalar<uint32_t>)
->ArgsProduct(kBitWidthsNumValues32);
BENCHMARK_CAPTURE(BM_UnpackUint64, ScalarUnaligned, false,
&bpacking::unpack_scalar<uint64_t>)
->ArgsProduct(kBitWidthsNumValues64);
// Register BM_Unpack{Bool,Uint8,Uint16,Uint32,Uint64} benchmarks for a given
// UNPACK_FUNC templated on each of those types, with explicit skip args.
#define BENCHMARK_UNPACK_ALL_TYPES_SKIP(LABEL, ALIGNED, UNPACK_FUNC, SKIP, SKIP_MSG) \
BENCHMARK_CAPTURE(BM_UnpackBool, LABEL, ALIGNED, &UNPACK_FUNC<bool>, SKIP, SKIP_MSG) \
->ArgsProduct(kBitWidthsNumValuesBool); \
BENCHMARK_CAPTURE(BM_UnpackUint8, LABEL, ALIGNED, &UNPACK_FUNC<uint8_t>, SKIP, \
SKIP_MSG) \
->ArgsProduct(kBitWidthsNumValues8); \
BENCHMARK_CAPTURE(BM_UnpackUint16, LABEL, ALIGNED, &UNPACK_FUNC<uint16_t>, SKIP, \
SKIP_MSG) \
->ArgsProduct(kBitWidthsNumValues16); \
BENCHMARK_CAPTURE(BM_UnpackUint32, LABEL, ALIGNED, &UNPACK_FUNC<uint32_t>, SKIP, \
SKIP_MSG) \
->ArgsProduct(kBitWidthsNumValues32); \
BENCHMARK_CAPTURE(BM_UnpackUint64, LABEL, ALIGNED, &UNPACK_FUNC<uint64_t>, SKIP, \
SKIP_MSG) \
->ArgsProduct(kBitWidthsNumValues64)

#define BENCHMARK_UNPACK_ALL_TYPES(LABEL, ALIGNED, UNPACK_FUNC) \
BENCHMARK_UNPACK_ALL_TYPES_SKIP(LABEL, ALIGNED, UNPACK_FUNC, false, "")

#define BENCHMARK_UNPACK_ALL_TYPES_RUNTIME(LABEL, ALIGNED, UNPACK_FUNC, CPU_FEATURE, \
SKIP_MSG) \
BENCHMARK_UNPACK_ALL_TYPES_SKIP( \
LABEL, ALIGNED, UNPACK_FUNC, \
!CpuInfo::GetInstance()->IsSupported(CpuInfo::CPU_FEATURE), SKIP_MSG)

BENCHMARK_UNPACK_ALL_TYPES(ScalarUnaligned, false, bpacking::unpack_scalar);

#if defined(ARROW_HAVE_SSE4_2)
BENCHMARK_CAPTURE(BM_UnpackBool, Sse42Unaligned, false, &bpacking::unpack_sse4_2<bool>)
->ArgsProduct(kBitWidthsNumValuesBool);
BENCHMARK_CAPTURE(BM_UnpackUint8, Sse42Unaligned, false,
&bpacking::unpack_sse4_2<uint8_t>)
->ArgsProduct(kBitWidthsNumValues8);
BENCHMARK_CAPTURE(BM_UnpackUint16, Sse42Unaligned, false,
&bpacking::unpack_sse4_2<uint16_t>)
->ArgsProduct(kBitWidthsNumValues16);
BENCHMARK_CAPTURE(BM_UnpackUint32, Sse42Unaligned, false,
&bpacking::unpack_sse4_2<uint32_t>)
->ArgsProduct(kBitWidthsNumValues32);
BENCHMARK_CAPTURE(BM_UnpackUint64, Sse42Unaligned, false,
&bpacking::unpack_sse4_2<uint64_t>)
->ArgsProduct(kBitWidthsNumValues64);
BENCHMARK_UNPACK_ALL_TYPES(Sse42Unaligned, false, bpacking::unpack_sse4_2);
#endif

#if defined(ARROW_HAVE_RUNTIME_AVX2)
BENCHMARK_CAPTURE(BM_UnpackBool, Avx2Unaligned, false, &bpacking::unpack_avx2<bool>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX2),
"Avx2 not available")
->ArgsProduct(kBitWidthsNumValuesBool);
BENCHMARK_CAPTURE(BM_UnpackUint8, Avx2Unaligned, false, &bpacking::unpack_avx2<uint8_t>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX2),
"Avx2 not available")
->ArgsProduct(kBitWidthsNumValues8);
BENCHMARK_CAPTURE(BM_UnpackUint16, Avx2Unaligned, false, &bpacking::unpack_avx2<uint16_t>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX2),
"Avx2 not available")
->ArgsProduct(kBitWidthsNumValues16);
BENCHMARK_CAPTURE(BM_UnpackUint32, Avx2Unaligned, false, &bpacking::unpack_avx2<uint32_t>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX2),
"Avx2 not available")
->ArgsProduct(kBitWidthsNumValues32);
BENCHMARK_CAPTURE(BM_UnpackUint64, Avx2Unaligned, false, &bpacking::unpack_avx2<uint64_t>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX2),
"Avx2 not available")
->ArgsProduct(kBitWidthsNumValues64);
BENCHMARK_UNPACK_ALL_TYPES_RUNTIME(Avx2Unaligned, false, bpacking::unpack_avx2, AVX2,
"Avx2 not available");
#endif

#if defined(ARROW_HAVE_RUNTIME_AVX512)
BENCHMARK_CAPTURE(BM_UnpackBool, Avx512Unaligned, false, &bpacking::unpack_avx512<bool>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX512),
"Avx512 not available")
->ArgsProduct(kBitWidthsNumValuesBool);
BENCHMARK_CAPTURE(BM_UnpackUint8, Avx512Unaligned, false,
&bpacking::unpack_avx512<uint8_t>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX512),
"Avx512 not available")
->ArgsProduct(kBitWidthsNumValues8);
BENCHMARK_CAPTURE(BM_UnpackUint16, Avx512Unaligned, false,
&bpacking::unpack_avx512<uint16_t>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX512),
"Avx512 not available")
->ArgsProduct(kBitWidthsNumValues16);
BENCHMARK_CAPTURE(BM_UnpackUint32, Avx512Unaligned, false,
&bpacking::unpack_avx512<uint32_t>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX512),
"Avx512 not available")
->ArgsProduct(kBitWidthsNumValues32);
BENCHMARK_CAPTURE(BM_UnpackUint64, Avx512Unaligned, false,
&bpacking::unpack_avx512<uint64_t>,
!CpuInfo::GetInstance()->IsSupported(CpuInfo::AVX512),
"Avx512 not available")
->ArgsProduct(kBitWidthsNumValues64);
BENCHMARK_UNPACK_ALL_TYPES_RUNTIME(Avx512Unaligned, false, bpacking::unpack_avx512,
AVX512, "Avx512 not available");
#endif

#if defined(ARROW_HAVE_NEON)
BENCHMARK_CAPTURE(BM_UnpackBool, NeonUnaligned, false, &bpacking::unpack_neon<bool>)
->ArgsProduct(kBitWidthsNumValuesBool);
BENCHMARK_CAPTURE(BM_UnpackUint8, NeonUnaligned, false, &bpacking::unpack_neon<uint8_t>)
->ArgsProduct(kBitWidthsNumValues8);
BENCHMARK_CAPTURE(BM_UnpackUint16, NeonUnaligned, false, &bpacking::unpack_neon<uint16_t>)
->ArgsProduct(kBitWidthsNumValues16);
BENCHMARK_CAPTURE(BM_UnpackUint32, NeonUnaligned, false, &bpacking::unpack_neon<uint32_t>)
->ArgsProduct(kBitWidthsNumValues32);
BENCHMARK_CAPTURE(BM_UnpackUint64, NeonUnaligned, false, &bpacking::unpack_neon<uint64_t>)
->ArgsProduct(kBitWidthsNumValues64);
BENCHMARK_UNPACK_ALL_TYPES(NeonUnaligned, false, bpacking::unpack_neon);
#endif

#if defined(ARROW_HAVE_RUNTIME_SVE128)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if there's an easy way to reduce the duplication we're doing for each runtime SIMD level?

For example if we could write something like:

BENCHMARK_SIMD_UNPACK(Bool, bool, SVE128, Sve128, sve128);

and it would expand to:

BENCHMARK_CAPTURE(BM_UnpackBool, Sve128Unaligned, false, &bpacking::unpack_sve128<bool>,
                  !CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128),
                  "Sve128 not available")
    ->ArgsProduct(kBitWidthsNumValues<bool>);

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean with a macro?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes!

BENCHMARK_UNPACK_ALL_TYPES_RUNTIME(Sve128Unaligned, false, bpacking::unpack_sve128,
SVE128, "Sve128 not available");
#endif

BENCHMARK_CAPTURE(BM_UnpackBool, DynamicAligned, true, &unpack<bool>)
->ArgsProduct(kBitWidthsNumValuesBool);
BENCHMARK_CAPTURE(BM_UnpackBool, DynamicUnaligned, false, &unpack<bool>)
->ArgsProduct(kBitWidthsNumValuesBool);

BENCHMARK_CAPTURE(BM_UnpackUint8, DynamicAligned, true, &unpack<uint8_t>)
->ArgsProduct(kBitWidthsNumValues8);
BENCHMARK_CAPTURE(BM_UnpackUint8, DynamicUnaligned, false, &unpack<uint8_t>)
->ArgsProduct(kBitWidthsNumValues8);

BENCHMARK_CAPTURE(BM_UnpackUint16, DynamicAligned, true, &unpack<uint16_t>)
->ArgsProduct(kBitWidthsNumValues16);
BENCHMARK_CAPTURE(BM_UnpackUint16, DynamicUnaligned, false, &unpack<uint16_t>)
->ArgsProduct(kBitWidthsNumValues16);

BENCHMARK_CAPTURE(BM_UnpackUint32, DynamicAligned, true, &unpack<uint32_t>)
->ArgsProduct(kBitWidthsNumValues32);
BENCHMARK_CAPTURE(BM_UnpackUint32, DynamicUnaligned, false, &unpack<uint32_t>)
->ArgsProduct(kBitWidthsNumValues32);

BENCHMARK_CAPTURE(BM_UnpackUint64, DynamicAligned, true, &unpack<uint64_t>)
->ArgsProduct(kBitWidthsNumValues64);
BENCHMARK_CAPTURE(BM_UnpackUint64, DynamicUnaligned, false, &unpack<uint64_t>)
->ArgsProduct(kBitWidthsNumValues64);
#if defined(ARROW_HAVE_RUNTIME_SVE256)
BENCHMARK_UNPACK_ALL_TYPES_RUNTIME(Sve256Unaligned, false, bpacking::unpack_sve256,
SVE256, "Sve256 not available");
#endif

BENCHMARK_UNPACK_ALL_TYPES(DynamicAligned, true, unpack);
BENCHMARK_UNPACK_ALL_TYPES(DynamicUnaligned, false, unpack);

#undef BENCHMARK_UNPACK_ALL_TYPES_RUNTIME
#undef BENCHMARK_UNPACK_ALL_TYPES
#undef BENCHMARK_UNPACK_ALL_TYPES_SKIP

} // namespace
} // namespace arrow::internal
Loading
Loading