Skip to content

Commit 504595e

Browse files
committed
Add SVE128
1 parent 40d40f8 commit 504595e

9 files changed

Lines changed: 154 additions & 4 deletions

File tree

cpp/cmake_modules/SetupCxxFlags.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,10 @@ elseif(ARROW_CPU_FLAG STREQUAL "aarch64")
145145
else()
146146
check_cxx_compiler_flag("${ARROW_SVE_FLAGS}" CXX_SUPPORTS_SVE)
147147
endif()
148+
if(CXX_SUPPORTS_SVE AND ARROW_RUNTIME_SIMD_LEVEL MATCHES "^(SVE128|SVE256|SVE512|MAX)$")
149+
set(ARROW_HAVE_RUNTIME_SVE128 ON)
150+
add_definitions(-DARROW_HAVE_RUNTIME_SVE128)
151+
endif()
148152
if(CXX_SUPPORTS_SVE AND ARROW_RUNTIME_SIMD_LEVEL MATCHES "^(SVE256|SVE512|MAX)$")
149153
set(ARROW_HAVE_RUNTIME_SVE256 ON)
150154
add_definitions(-DARROW_HAVE_RUNTIME_SVE256)

cpp/src/arrow/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,7 @@ append_runtime_avx2_src(ARROW_UTIL_SRCS util/byte_stream_split_internal_avx2.cc)
567567
append_runtime_avx2_src(ARROW_UTIL_SRCS util/bpacking_simd_256.cc)
568568
append_runtime_avx512_src(ARROW_UTIL_SRCS util/bpacking_simd_avx512.cc)
569569

570+
append_runtime_sve128_src(ARROW_UTIL_SRCS util/bpacking_simd_128_alt.cc)
570571
append_runtime_sve256_src(ARROW_UTIL_SRCS util/bpacking_simd_256.cc)
571572

572573
if(ARROW_WITH_BROTLI)

cpp/src/arrow/util/bpacking_benchmark.cc

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include "arrow/util/bpacking_scalar_internal.h"
2727
#include "arrow/util/bpacking_simd_internal.h"
2828

29-
#if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_SVE256)
29+
#if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_SVE128)
3030
# include "arrow/util/cpu_info.h"
3131
#endif
3232

@@ -254,6 +254,33 @@ BENCHMARK_CAPTURE(BM_UnpackUint64, NeonUnaligned, false, &bpacking::unpack_neon<
254254
->ArgsProduct(kBitWidthsNumValues64);
255255
#endif
256256

257+
#if defined(ARROW_HAVE_RUNTIME_SVE128)
258+
BENCHMARK_CAPTURE(BM_UnpackBool, Sve128Unaligned, false, &bpacking::unpack_sve128<bool>,
259+
!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128),
260+
"Sve128 not available")
261+
->ArgsProduct(kBitWidthsNumValuesBool);
262+
BENCHMARK_CAPTURE(BM_UnpackUint8, Sve128Unaligned, false,
263+
&bpacking::unpack_sve128<uint8_t>,
264+
!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128),
265+
"Sve128 not available")
266+
->ArgsProduct(kBitWidthsNumValues8);
267+
BENCHMARK_CAPTURE(BM_UnpackUint16, Sve128Unaligned, false,
268+
&bpacking::unpack_sve128<uint16_t>,
269+
!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128),
270+
"Sve128 not available")
271+
->ArgsProduct(kBitWidthsNumValues16);
272+
BENCHMARK_CAPTURE(BM_UnpackUint32, Sve128Unaligned, false,
273+
&bpacking::unpack_sve128<uint32_t>,
274+
!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128),
275+
"Sve128 not available")
276+
->ArgsProduct(kBitWidthsNumValues32);
277+
BENCHMARK_CAPTURE(BM_UnpackUint64, Sve128Unaligned, false,
278+
&bpacking::unpack_sve128<uint64_t>,
279+
!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128),
280+
"Sve128 not available")
281+
->ArgsProduct(kBitWidthsNumValues64);
282+
#endif
283+
257284
#if defined(ARROW_HAVE_RUNTIME_SVE256)
258285
BENCHMARK_CAPTURE(BM_UnpackBool, Sve256Unaligned, false, &bpacking::unpack_sve256<bool>,
259286
!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE256),
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#if defined(ARROW_HAVE_RUNTIME_SVE128)
19+
# define UNPACK_PLATFORM unpack_sve128
20+
#endif
21+
22+
#if defined(UNPACK_PLATFORM)
23+
24+
# include <xsimd/xsimd.hpp>
25+
26+
# include "arrow/util/bpacking_dispatch_internal.h"
27+
# include "arrow/util/bpacking_simd_internal.h"
28+
# include "arrow/util/bpacking_simd_kernel_internal.h"
29+
30+
namespace arrow::internal::bpacking {
31+
32+
template <typename UnpackedUint, int kPackedBitSize>
33+
using Simd128Kernel = Kernel<UnpackedUint, kPackedBitSize, 128>;
34+
35+
template <typename Uint>
36+
void UNPACK_PLATFORM(const uint8_t* in, Uint* out, const UnpackOptions& opts) {
37+
static_assert(std::is_same_v<xsimd::default_arch, xsimd::detail::sve<128>>);
38+
return unpack_jump<Simd128Kernel>(in, out, opts);
39+
}
40+
41+
template void UNPACK_PLATFORM<bool>(const uint8_t*, bool*, const UnpackOptions&);
42+
template void UNPACK_PLATFORM<uint8_t>(const uint8_t*, uint8_t*, const UnpackOptions&);
43+
template void UNPACK_PLATFORM<uint16_t>(const uint8_t*, uint16_t*, const UnpackOptions&);
44+
template void UNPACK_PLATFORM<uint32_t>(const uint8_t*, uint32_t*, const UnpackOptions&);
45+
template void UNPACK_PLATFORM<uint64_t>(const uint8_t*, uint64_t*, const UnpackOptions&);
46+
47+
} // namespace arrow::internal::bpacking
48+
49+
# undef UNPACK_PLATFORM
50+
#endif // UNPACK_PLATFORM

cpp/src/arrow/util/bpacking_simd_internal.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,34 @@ extern template ARROW_TEMPLATE_EXPORT void UNPACK_ARCH128<uint64_t>(
5353
#endif // UNPACK_ARCH128
5454
#undef UNPACK_ARCH128
5555

56+
#if defined(ARROW_HAVE_RUNTIME_SVE128)
57+
# define UNPACK_ARCH128_ALT unpack_sve128
58+
#endif
59+
60+
#if defined(UNPACK_ARCH128_ALT)
61+
62+
template <typename Uint>
63+
ARROW_EXPORT void UNPACK_ARCH128_ALT(const uint8_t* in, Uint* out,
64+
const UnpackOptions& opts);
65+
66+
extern template ARROW_TEMPLATE_EXPORT void UNPACK_ARCH128_ALT<bool>( //
67+
const uint8_t* in, bool* out, const UnpackOptions& opts);
68+
69+
extern template ARROW_TEMPLATE_EXPORT void UNPACK_ARCH128_ALT<uint8_t>(
70+
const uint8_t* in, uint8_t* out, const UnpackOptions& opts);
71+
72+
extern template ARROW_TEMPLATE_EXPORT void UNPACK_ARCH128_ALT<uint16_t>(
73+
const uint8_t* in, uint16_t* out, const UnpackOptions& opts);
74+
75+
extern template ARROW_TEMPLATE_EXPORT void UNPACK_ARCH128_ALT<uint32_t>(
76+
const uint8_t* in, uint32_t* out, const UnpackOptions& opts);
77+
78+
extern template ARROW_TEMPLATE_EXPORT void UNPACK_ARCH128_ALT<uint64_t>(
79+
const uint8_t* in, uint64_t* out, const UnpackOptions& opts);
80+
81+
#endif // UNPACK_ARCH128_ALT
82+
#undef UNPACK_ARCH128_ALT
83+
5684
#if defined(ARROW_HAVE_SVE256) || defined(ARROW_HAVE_RUNTIME_SVE256)
5785
# define UNPACK_ARCH256 unpack_sve256
5886
#elif defined(UNPACK_ARCH256) || defined(ARROW_HAVE_RUNTIME_AVX2)

cpp/src/arrow/util/bpacking_test.cc

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#include "arrow/util/bpacking_scalar_internal.h"
2828
#include "arrow/util/bpacking_simd_internal.h"
2929

30-
#if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_SVE256)
30+
#if defined(ARROW_HAVE_RUNTIME_AVX2) || defined(ARROW_HAVE_RUNTIME_SVE128)
3131
# include "arrow/util/cpu_info.h"
3232
#endif
3333

@@ -349,6 +349,39 @@ TEST_P(TestUnpack, Unpack32Neon) { this->TestAll(&bpacking::unpack_neon<uint32_t
349349
TEST_P(TestUnpack, Unpack64Neon) { this->TestAll(&bpacking::unpack_neon<uint64_t>); }
350350
#endif
351351

352+
#if defined(ARROW_HAVE_RUNTIME_SVE128)
353+
TEST_P(TestUnpack, UnpackBoolSve128) {
354+
if (!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128)) {
355+
GTEST_SKIP() << "Test requires SVE128";
356+
}
357+
this->TestAll(&bpacking::unpack_sve128<bool>);
358+
}
359+
TEST_P(TestUnpack, Unpack8Sve128) {
360+
if (!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128)) {
361+
GTEST_SKIP() << "Test requires SVE128";
362+
}
363+
this->TestAll(&bpacking::unpack_sve128<uint8_t>);
364+
}
365+
TEST_P(TestUnpack, Unpack16Sve128) {
366+
if (!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128)) {
367+
GTEST_SKIP() << "Test requires SVE128";
368+
}
369+
this->TestAll(&bpacking::unpack_sve128<uint16_t>);
370+
}
371+
TEST_P(TestUnpack, Unpack32Sve128) {
372+
if (!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128)) {
373+
GTEST_SKIP() << "Test requires SVE128";
374+
}
375+
this->TestAll(&bpacking::unpack_sve128<uint32_t>);
376+
}
377+
TEST_P(TestUnpack, Unpack64Sve128) {
378+
if (!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE128)) {
379+
GTEST_SKIP() << "Test requires SVE128";
380+
}
381+
this->TestAll(&bpacking::unpack_sve128<uint64_t>);
382+
}
383+
#endif
384+
352385
#if defined(ARROW_HAVE_RUNTIME_SVE256)
353386
TEST_P(TestUnpack, UnpackBoolSve256) {
354387
if (!CpuInfo::GetInstance()->IsSupported(CpuInfo::SVE256)) {

cpp/src/arrow/util/cpu_info.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,7 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
431431
assert(vl >= 0);
432432
// prctl returns vector length in bytes; mask off status flags
433433
const int vl_bytes = vl & PR_SVE_VL_LEN_MASK;
434+
if (vl_bytes >= 16) *hardware_flags |= CpuInfo::SVE128; // 128 bits
434435
if (vl_bytes >= 32) *hardware_flags |= CpuInfo::SVE256; // 256 bits
435436
if (vl_bytes >= 64) *hardware_flags |= CpuInfo::SVE512; // 512 bits
436437
# endif // PR_SVE_GET_VL
@@ -500,6 +501,7 @@ bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_fla
500501
enum {
501502
USER_SIMD_NONE,
502503
USER_SIMD_SVE,
504+
USER_SIMD_SVE128,
503505
USER_SIMD_SVE256,
504506
USER_SIMD_SVE512,
505507
USER_SIMD_MAX,
@@ -508,10 +510,12 @@ bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_fla
508510
int level = USER_SIMD_MAX;
509511
if (simd_level == "SVE") {
510512
level = USER_SIMD_SVE;
511-
} else if (simd_level == "SVE512") {
512-
level = USER_SIMD_SVE512;
513+
} else if (simd_level == "SVE128") {
514+
level = USER_SIMD_SVE128;
513515
} else if (simd_level == "SVE256") {
514516
level = USER_SIMD_SVE256;
517+
} else if (simd_level == "SVE512") {
518+
level = USER_SIMD_SVE512;
515519
} else if (simd_level == "NONE") {
516520
level = USER_SIMD_NONE;
517521
} else {
@@ -520,6 +524,7 @@ bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t* hardware_fla
520524

521525
if (level < USER_SIMD_SVE512) *hardware_flags &= ~CpuInfo::SVE512;
522526
if (level < USER_SIMD_SVE256) *hardware_flags &= ~CpuInfo::SVE256;
527+
if (level < USER_SIMD_SVE128) *hardware_flags &= ~CpuInfo::SVE128;
523528
if (level < USER_SIMD_SVE) *hardware_flags &= ~CpuInfo::SVE;
524529
return true;
525530
}

cpp/src/arrow/util/cpu_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class ARROW_EXPORT CpuInfo {
5757
/// Arm features
5858
static constexpr int64_t ASIMD = (1LL << 32);
5959
static constexpr int64_t SVE = (1LL << 33);
60+
static constexpr int64_t SVE128 = (1LL << 36);
6061
static constexpr int64_t SVE256 = (1LL << 34);
6162
static constexpr int64_t SVE512 = (1LL << 35);
6263

cpp/src/arrow/util/dispatch_internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ enum class DispatchLevel : int {
3333
AVX2,
3434
AVX512,
3535
NEON,
36+
SVE128,
3637
SVE256,
3738
SVE512,
3839
MAX

0 commit comments

Comments
 (0)