Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 168 additions & 2 deletions hwy/contrib/algo/minmax-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
// limitations under the License.

// Per-target include guard
#include <cstddef>
#include <utility>
#if defined(HIGHWAY_HWY_CONTRIB_ALGO_MINMAX_INL_H_) == \
defined(HWY_TARGET_TOGGLE) // NOLINT
#ifdef HIGHWAY_HWY_CONTRIB_ALGO_MINMAX_INL_H_
Expand All @@ -28,7 +30,8 @@ HWY_BEFORE_NAMESPACE();
namespace hwy {
namespace HWY_NAMESPACE {

// Returns the minimum value in `in[0, count)` or PositiveInfOrHighestValue<T>() if count == 0.
// Returns the minimum value in `in[0, count)` or
// PositiveInfOrHighestValue<T>() if count == 0.
template <class D, typename T = TFromD<D>>
T MinValue(D d, const T* HWY_RESTRICT in, size_t count) {
const size_t N = Lanes(d);
Expand Down Expand Up @@ -61,7 +64,8 @@ T MinValue(D d, const T* HWY_RESTRICT in, size_t count) {
return ReduceMin(d, acc0);
}

// Returns the maximum value in `in[0, count)` or NegativeInfOrLowestValue<T>() if count == 0.
// Returns the maximum value in `in[0, count)` or
// NegativeInfOrLowestValue<T>() if count == 0.
template <class D, typename T = TFromD<D>>
T MaxValue(D d, const T* HWY_RESTRICT in, size_t count) {
const size_t N = Lanes(d);
Expand Down Expand Up @@ -94,6 +98,168 @@ T MaxValue(D d, const T* HWY_RESTRICT in, size_t count) {
return ReduceMax(d, acc0);
}

template <class DF, typename T = TFromD<DF>,
class DF2 = CappedTag<T, 2>, class VF2 = Vec<DF2>,
class VF = Vec<DF>, class HRed, class VRed>
static HWY_INLINE std::pair<T, T> Reduce2X(DF df, VF x_0, VF x_1, HRed h_red,
VRed v_red) {
const DF2 df2;
if constexpr (MaxLanes(df) < 2) {
return {h_red(df, x_0), h_red(df, x_1)};
} else {
constexpr size_t kMaxLanes = MaxLanes(df);
HWY_LANES_CONSTEXPR size_t kLanes = Lanes(df);
HWY_ALIGN T x_transposed[2 * kMaxLanes];
StoreInterleaved2(x_0, x_1, df, x_transposed);
VF x01 = v_red(Load(df, x_transposed), Load(df, x_transposed + kLanes));
Store(x01, df, x_transposed);

VF2 result = Load(df2, x_transposed);
for (size_t i = 1; i < kLanes / 2; ++i) {
result = v_red(result, Load(df2, x_transposed + i * 2));
}
HWY_ALIGN T temp[2];
Store(result, df2, temp);
return {temp[0], temp[1]};
}
}

template <class DF, typename T = TFromD<DF>,
class DF4 = CappedTag<T, 4>, class VF4 = Vec<DF4>,
class VF = Vec<DF>, class HRed, class VRed>
static HWY_INLINE VF4 Reduce4X(DF df, VF x_0, VF x_1, VF x_2, VF x_3,
HRed h_red, VRed v_red) {
const DF4 df4;
if constexpr ((HWY_TARGET & (HWY_ALL_NEON | HWY_ALL_SVE | HWY_RVV)) != 0 ||
MaxLanes(df) < 4) {
HWY_ALIGN T temp[4] = {h_red(df, x_0), h_red(df, x_1), h_red(df, x_2),
h_red(df, x_3)};
return Load(df4, temp);
} else {
constexpr size_t kMaxLanes = MaxLanes(df);
HWY_LANES_CONSTEXPR size_t kLanes = Lanes(df);
HWY_ALIGN T x_transposed[4 * kMaxLanes];
StoreInterleaved4(x_0, x_1, x_2, x_3, df, x_transposed);
VF x01 = v_red(Load(df, x_transposed), Load(df, x_transposed + kLanes));
VF x23 = v_red(Load(df, x_transposed + 2 * kLanes),
Load(df, x_transposed + 3 * kLanes));
VF x0123 = v_red(x01, x23);
Store(x0123, df, x_transposed);

VF4 result = Load(df4, x_transposed);
for (size_t i = 1; i < kLanes / 4; ++i) {
result = v_red(result, Load(df4, x_transposed + i * 4));
}
return result;
}
}

template <class DF, typename T = TFromD<DF>,
class DF8 = CappedTag<T, 8>, class VF8 = Vec<DF8>,
class VF = Vec<DF>, class HRed, class VRed>
static HWY_INLINE VF8 Reduce8X(DF df, VF x_0, VF x_1, VF x_2, VF x_3, VF x_4,
VF x_5, VF x_6, VF x_7, HRed h_red, VRed v_red) {
const DF8 df8;
if constexpr ((HWY_TARGET & (HWY_ALL_NEON | HWY_ALL_SVE | HWY_RVV)) != 0 ||
MaxLanes(df) < 8) {
HWY_ALIGN T temp[8] = {h_red(df, x_0), h_red(df, x_1), h_red(df, x_2),
h_red(df, x_3), h_red(df, x_4), h_red(df, x_5),
h_red(df, x_6), h_red(df, x_7)};
return Load(df8, temp);
} else {
auto res0123 = Reduce4X(df, x_0, x_1, x_2, x_3, h_red, v_red);
auto res4567 = Reduce4X(df, x_4, x_5, x_6, x_7, h_red, v_red);

return Combine(df8, res4567, res0123);
}
}



template <class DF, typename T = TFromD<DF>, class VF = Vec<DF>>
static HWY_INLINE std::pair<T, T> Reduce2Min(DF df, VF x_0, VF x_1) {
return Reduce2X(
df, x_0, x_1, [](auto d, auto v) HWY_ATTR { return ReduceMin(d, v); },
[](auto a, auto b) HWY_ATTR { return Min(a, b); });
}

template <class DF, typename T = TFromD<DF>, class VF = Vec<DF>>
static HWY_INLINE std::pair<T, T> Reduce2Max(DF df, VF x_0, VF x_1) {
return Reduce2X(
df, x_0, x_1, [](auto d, auto v) HWY_ATTR { return ReduceMax(d, v); },
[](auto a, auto b) HWY_ATTR { return Max(a, b); });
}

template <class DF, typename T = TFromD<DF>, class VF = Vec<DF>>
static HWY_INLINE std::pair<T, T> Reduce2Sum(DF df, VF x_0, VF x_1) {
return Reduce2X(
df, x_0, x_1, [](auto d, auto v) HWY_ATTR { return ReduceSum(d, v); },
[](auto a, auto b) HWY_ATTR { return Add(a, b); });
}

template <class DF, typename T = TFromD<DF>,
class DF4 = CappedTag<T, 4>, class VF4 = Vec<DF4>,
class VF = Vec<DF>>
static HWY_INLINE VF4 Reduce4Min(DF df, VF x_0, VF x_1, VF x_2, VF x_3) {
return Reduce4X(
df, x_0, x_1, x_2, x_3,
[](auto d, auto v) HWY_ATTR { return ReduceMin(d, v); },
[](auto a, auto b) HWY_ATTR { return Min(a, b); });
}

template <class DF, typename T = TFromD<DF>,
class DF4 = CappedTag<T, 4>, class VF4 = Vec<DF4>,
class VF = Vec<DF>>
static HWY_INLINE VF4 Reduce4Max(DF df, VF x_0, VF x_1, VF x_2, VF x_3) {
return Reduce4X(
df, x_0, x_1, x_2, x_3,
[](auto d, auto v) HWY_ATTR { return ReduceMax(d, v); },
[](auto a, auto b) HWY_ATTR { return Max(a, b); });
}

template <class DF, typename T = TFromD<DF>,
class DF4 = CappedTag<T, 4>, class VF4 = Vec<DF4>,
class VF = Vec<DF>>
static HWY_INLINE VF4 Reduce4Sum(DF df, VF x_0, VF x_1, VF x_2, VF x_3) {
return Reduce4X(
df, x_0, x_1, x_2, x_3,
[](auto d, auto v) HWY_ATTR { return ReduceSum(d, v); },
[](auto a, auto b) HWY_ATTR { return Add(a, b); });
}

template <class DF, typename T = TFromD<DF>,
class DF8 = CappedTag<T, 8>, class VF8 = Vec<DF8>,
class VF = Vec<DF>>
static HWY_INLINE VF8 Reduce8Min(DF df, VF x_0, VF x_1, VF x_2, VF x_3, VF x_4,
VF x_5, VF x_6, VF x_7) {
return Reduce8X(
df, x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7,
[](auto d, auto v) HWY_ATTR { return ReduceMin(d, v); },
[](auto a, auto b) HWY_ATTR { return Min(a, b); });
}

template <class DF, typename T = TFromD<DF>,
class DF8 = CappedTag<T, 8>, class VF8 = Vec<DF8>,
class VF = Vec<DF>>
static HWY_INLINE VF8 Reduce8Max(DF df, VF x_0, VF x_1, VF x_2, VF x_3, VF x_4,
VF x_5, VF x_6, VF x_7) {
return Reduce8X(
df, x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7,
[](auto d, auto v) HWY_ATTR { return ReduceMax(d, v); },
[](auto a, auto b) HWY_ATTR { return Max(a, b); });
}

template <class DF, typename T = TFromD<DF>,
class DF8 = CappedTag<T, 8>, class VF8 = Vec<DF8>,
class VF = Vec<DF>>
static HWY_INLINE VF8 Reduce8Sum(DF df, VF x_0, VF x_1, VF x_2, VF x_3, VF x_4,
VF x_5, VF x_6, VF x_7) {
return Reduce8X(
df, x_0, x_1, x_2, x_3, x_4, x_5, x_6, x_7,
[](auto d, auto v) HWY_ATTR { return ReduceSum(d, v); },
[](auto a, auto b) HWY_ATTR { return Add(a, b); });
}

// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
} // namespace hwy
Expand Down
96 changes: 96 additions & 0 deletions hwy/contrib/algo/minmax_value_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,101 @@ void TestAllMaxValue() {
ForAllTypes(ForPartialVectors<ForeachCountAndMisalign<TestMaxValue>>());
}

struct TestReduce {
template <typename T, class D>
HWY_NOINLINE void operator()(T /*unused*/, D d) const {
const size_t N = Lanes(d);
AlignedFreeUniquePtr<T[]> storage = AllocateAligned<T>(8 * N);
HWY_ASSERT(storage);
T* in = storage.get();
for (size_t i = 0; i < 8 * N; ++i) {
in[i] = static_cast<T>((i % 13) + 1);
}

auto x0 = Load(d, in + 0 * N);
auto x1 = Load(d, in + 1 * N);
auto x2 = Load(d, in + 2 * N);
auto x3 = Load(d, in + 3 * N);
auto x4 = Load(d, in + 4 * N);
auto x5 = Load(d, in + 5 * N);
auto x6 = Load(d, in + 6 * N);
auto x7 = Load(d, in + 7 * N);

T min_s[8];
T max_s[8];
T sum_s[8];
for (int v = 0; v < 8; ++v) {
min_s[v] = hwy::PositiveInfOrHighestValue<T>();
max_s[v] = hwy::NegativeInfOrLowestValue<T>();
sum_s[v] = 0;
for (size_t i = 0; i < N; ++i) {
T val = in[v * N + i];
if (val < min_s[v]) min_s[v] = val;
if (val > max_s[v]) max_s[v] = val;
sum_s[v] = AddWithWraparound(sum_s[v], val);
}
}

using D2 = CappedTag<T, 2>;
const D2 d2;
if (Lanes(d2) == 2) {
HWY_ALIGN T actual_min[2];
HWY_ALIGN T actual_max[2];
HWY_ALIGN T actual_sum[2];
auto p_min = Reduce2Min(d, x0, x1);
actual_min[0] = p_min.first;
actual_min[1] = p_min.second;
auto p_max = Reduce2Max(d, x0, x1);
actual_max[0] = p_max.first;
actual_max[1] = p_max.second;
auto p_sum = Reduce2Sum(d, x0, x1);
actual_sum[0] = p_sum.first;
actual_sum[1] = p_sum.second;
for (int i = 0; i < 2; ++i) {
HWY_ASSERT_EQ(min_s[i], actual_min[i]);
HWY_ASSERT_EQ(max_s[i], actual_max[i]);
HWY_ASSERT_EQ(sum_s[i], actual_sum[i]);
}
}

using D4 = CappedTag<T, 4>;
const D4 d4;
if (Lanes(d4) == 4) {
HWY_ALIGN T actual_min[4];
HWY_ALIGN T actual_max[4];
HWY_ALIGN T actual_sum[4];
Store(Reduce4Min(d, x0, x1, x2, x3), d4, actual_min);
Store(Reduce4Max(d, x0, x1, x2, x3), d4, actual_max);
Store(Reduce4Sum(d, x0, x1, x2, x3), d4, actual_sum);
for (int i = 0; i < 4; ++i) {
HWY_ASSERT_EQ(min_s[i], actual_min[i]);
HWY_ASSERT_EQ(max_s[i], actual_max[i]);
HWY_ASSERT_EQ(sum_s[i], actual_sum[i]);
}
}

using D8 = CappedTag<T, 8>;
const D8 d8;
if (Lanes(d8) == 8) {
HWY_ALIGN T actual_min[8];
HWY_ALIGN T actual_max[8];
HWY_ALIGN T actual_sum[8];
Store(Reduce8Min(d, x0, x1, x2, x3, x4, x5, x6, x7), d8, actual_min);
Store(Reduce8Max(d, x0, x1, x2, x3, x4, x5, x6, x7), d8, actual_max);
Store(Reduce8Sum(d, x0, x1, x2, x3, x4, x5, x6, x7), d8, actual_sum);
for (int i = 0; i < 8; ++i) {
HWY_ASSERT_EQ(min_s[i], actual_min[i]);
HWY_ASSERT_EQ(max_s[i], actual_max[i]);
HWY_ASSERT_EQ(sum_s[i], actual_sum[i]);
}
}
}
};

void TestAllReduce() {
ForAllTypes(ForPartialVectors<TestReduce>());
}

} // namespace
// NOLINTNEXTLINE(google-readability-namespace-comments)
} // namespace HWY_NAMESPACE
Expand All @@ -158,6 +253,7 @@ namespace {
HWY_BEFORE_TEST(MinMaxTest);
HWY_EXPORT_AND_TEST_P(MinMaxTest, TestAllMinValue);
HWY_EXPORT_AND_TEST_P(MinMaxTest, TestAllMaxValue);
HWY_EXPORT_AND_TEST_P(MinMaxTest, TestAllReduce);
HWY_AFTER_TEST();
} // namespace
} // namespace hwy
Expand Down
Loading