diff --git a/folly/lang/BUCK b/folly/lang/BUCK index 6d79a58be62..38047f16344 100644 --- a/folly/lang/BUCK +++ b/folly/lang/BUCK @@ -400,6 +400,15 @@ non_fbcode_target( raw_headers = ["VectorTraits.h"], ) +non_fbcode_target( + _kind = folly_xplat_cxx_library, + name = "bit_fields", + raw_headers = ["BitFields.h"], + exported_deps = [ + ":bits", + ], +) + # fbcode build rules fbcode_target( @@ -450,6 +459,15 @@ fbcode_target( ], ) +fbcode_target( + _kind = cpp_library, + name = "bit_fields", + headers = ["BitFields.h"], + exported_deps = [ + ":bits", + ], +) + fbcode_target( _kind = cpp_library, name = "bits", diff --git a/folly/lang/BitFields.h b/folly/lang/BitFields.h new file mode 100644 index 00000000000..43f847730b1 --- /dev/null +++ b/folly/lang/BitFields.h @@ -0,0 +1,445 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace folly { + +// Forward declarations for transformers +template +struct or_transformer; +template +struct and_transformer; +template +struct add_transformer; + +// Declares a wrapper type around UnderlyingT that allows it to be divided up +// into and accessed as bit fields. This is mostly intended to aid in packing +// fields into atomic variables to reduce the need for locking in concurrent +// code and/or to simplify reasoning on and accommodation of different +// interesting, bug-prone interleavings. Convenient atomic wrappers +// (relaxed_bit_fields_atomic, acq_rel_bit_fields_atomic) are provided in +// folly/synchronization/AtomicBitFields.h to aid usage with atomics, +// especially for CAS updates, but it is even possible to combine operations on +// multiple bit fields into a single non-CAS atomic operation using Transforms. +// +// Unlike C/C++ bit fields, this implementation guarantees tight bit packing +// so that all available lock-free atomic bits can be utilized. +// +// The specific bit fields are declared outside the declaration using +// bool_bit_field and unsigned_bit_field below. Example usage: +// +// struct MyState : public bit_fields { +// // Extra helper declarations and/or field type declarations +// }; +// +// // Starts with a 16-bit field returned as uint16_t +// using Field1 = unsigned_bit_field; +// using Field2 = bool_bit_field; +// using Field3 = bool_bit_field; +// using Field4 = unsigned_bit_field; // 5 bits in uint8_t +// +// // MyState{} is zero-initialized +// auto state = MyState{}.with(42U).with(true); +// state.set(3U); +// state.ref() += state.get(); +// +// Note that there's nothing preventing you from declaring overlapping fields +// in the same 'MyState' family. This could be useful for variant types where +// an earlier field determines which layout later fields are using. For +// example, an alternate field after Field2: +// +// using Field3a = unsigned_bit_field; // 6 bits in uint8_t +// +template +struct bit_fields { + using underlying_type = UnderlyingT; + underlying_type underlying = 0; + static constexpr int kBitCount = sizeof(underlying_type) * 8; + + using derived_type = DerivedT; + + // Modify a given field in place + template + void set(typename BitFieldT::value_type value) { + static_assert( + std::is_same_v); + derived_type& derived = static_cast(*this); + BitFieldT::setIn(derived, value); + } + + // Return a copy with the given field modified + template + constexpr derived_type with(typename BitFieldT::value_type value) const { + static_assert( + std::is_same_v); + derived_type rv = static_cast(*this); + BitFieldT::setIn(rv, value); + return rv; + } + + // Get the value of a field + template + constexpr typename BitFieldT::value_type get() const { + static_assert( + std::is_same_v); + return BitFieldT::getFrom(static_cast(*this)); + } + + // Reference and ref() are not intended to behave as full references but to + // provide a convenient way to do operations like +=, |=, etc. get and set + // are preferred for simple operations. + template + struct reference { + explicit reference(bit_fields& bf) : bf_(bf) {} + reference(const reference&) = default; + ~reference() = default; + reference& operator=(const reference&) = default; + reference(reference&&) noexcept = default; + reference& operator=(reference&&) noexcept = default; + + void operator=(typename BitFieldT::value_type value) { + bf_.set(value); + } + void operator+=(typename BitFieldT::value_type value) { + bf_.set(bf_.get() + value); + } + void operator-=(typename BitFieldT::value_type value) { + bf_.set(bf_.get() - value); + } + void operator|=(typename BitFieldT::value_type value) { + bf_.set(bf_.get() | value); + } + void operator&=(typename BitFieldT::value_type value) { + bf_.set(bf_.get() & value); + } + + private: + bit_fields& bf_; + }; + + template + reference ref() { + return reference(*this); + } + + // Apply an or_transformer to update the underlying value + void apply(const or_transformer& t) { underlying |= t.to_or; } + + // Apply an and_transformer to update the underlying value + void apply(const and_transformer& t) { underlying &= t.to_and; } + + // Apply an add_transformer to update the underlying value + void apply(const add_transformer& t) { + t.assertPreconditions(underlying); + underlying += t.to_add; + } + + // Return a copy with an or_transformer applied + constexpr derived_type transformed( + const or_transformer& t) const { + derived_type rv = static_cast(*this); + rv.underlying |= t.to_or; + return rv; + } + + // Return a copy with an and_transformer applied + constexpr derived_type transformed( + const and_transformer& t) const { + derived_type rv = static_cast(*this); + rv.underlying &= t.to_and; + return rv; + } + + // Return a copy with an add_transformer applied + derived_type transformed(const add_transformer& t) const { + t.assertPreconditions(underlying); + derived_type rv = static_cast(*this); + rv.underlying += t.to_add; + return rv; + } + + // NOTE: use = default with C++20 + constexpr bool operator==(const bit_fields& other) const { + return underlying == other.underlying; + } + // NOTE: use = default with C++20 + constexpr bool operator!=(const bit_fields& other) const { + return underlying != other.underlying; + } +}; + +// For building atomic updates affecting one or more fields, assuming all the +// updates are bitwise-or. +template +struct or_transformer { + using underlying_type = typename BitFieldsT::underlying_type; + underlying_type to_or = 0; + // + for general combine + or_transformer operator+( + const or_transformer& other) const { + return or_transformer{to_or | other.to_or}; + } +}; + +// For building atomic updates affecting one or more fields, assuming all the +// updates are bitwise-and. +template +struct and_transformer { + using underlying_type = typename BitFieldsT::underlying_type; + underlying_type to_and = 0; + // + for general combine + and_transformer operator+( + const and_transformer& other) const { + return and_transformer{to_and & other.to_and}; + } +}; + +// Can represent a combination of both subtractions and additions, representing +// subtractions as the addition of a negated value. To ensure we don't create a +// net overflow or underflow between fields, in debug builds we track the +// corresponding preconditions. (NOTE that when representing a subtraction, we +// rely on overflow of the unsigned representation.) +template +struct add_transformer { + using underlying_type = typename BitFieldsT::underlying_type; + underlying_type to_add = 0; +#ifndef NDEBUG + struct precondition { + underlying_type mask; // for bits of the target field + underlying_type piece; // component of to_add for the target field + }; + std::vector preconditions; +#endif // NDEBUG + + add_transformer() = default; + explicit add_transformer(underlying_type val) : to_add(val) {} + + void assertPreconditions([[maybe_unused]] underlying_type from) const { +#ifndef NDEBUG + for (auto p : preconditions) { + underlying_type tmp = (from & p.mask) + p.piece; + // Assert no under/overflow (unless the field is at the top bits of the + // representation in underlying_type, which is allowed because it doesn't + // lead to leakage into other fields) + assert((tmp & ~p.mask) == 0); + } +#endif // NDEBUG + } + + // + for general combine + add_transformer operator+( + const add_transformer& other) const { + add_transformer rv(to_add + other.to_add); +#ifndef NDEBUG + rv.preconditions = preconditions; + rv.preconditions.insert( + rv.preconditions.end(), + other.preconditions.begin(), + other.preconditions.end()); +#endif // NDEBUG + return rv; + } +}; + +namespace detail { + +// NOTE: PrevField is not a direct template parameter here to avoid exponential +// blowup in compiled mangled names +template +struct bool_bit_field_impl { + using parent_type = BitFieldsT; + using parent_base = bit_fields< + typename BitFieldsT::underlying_type, + typename BitFieldsT::derived_type>; + using underlying_type = typename BitFieldsT::underlying_type; + using value_type = bool; + static constexpr int kBitOffset = PrevFieldEndBit; + static constexpr int kEndBit = kBitOffset + 1; + static_assert(kBitOffset >= 0 && kEndBit <= BitFieldsT::kBitCount); + + // no instances + bool_bit_field_impl() = delete; + + // NOTE: allow BitFieldsT to be derived from bit_fields<> which can be + // passed in here + static constexpr bool getFrom(const parent_base& bf) { + return (bf.underlying & (underlying_type{1} << kBitOffset)) != 0; + } + static constexpr void setIn(parent_base& bf, bool value) { + // NOTE: avoiding conditional branches is usually best for speed on modern + // processors + bf.underlying = (bf.underlying & ~(underlying_type{1} << kBitOffset)) | + (underlying_type{value} << kBitOffset); + } + static or_transformer setTransform() { return orTransform(true); } + static or_transformer orTransform(bool b) { + return or_transformer{underlying_type{b} << kBitOffset}; + } + static and_transformer clearTransform() { + return andTransform(false); + } + static and_transformer andTransform(bool b) { + return and_transformer{~(underlying_type{!b} << kBitOffset)}; + } +}; + +// NOTE: PrevField is not a direct template parameter here to avoid exponential +// blowup in compiled mangled names +template +struct unsigned_bit_field_impl { + using parent_type = BitFieldsT; + using underlying_type = typename BitFieldsT::underlying_type; + // Smallest uint type that can fit kBitCount bits + using value_type = std::conditional_t< + kBitCount_ <= 8, + uint8_t, + std::conditional_t< + kBitCount_ <= 16, + uint16_t, + std::conditional_t>>; + static constexpr int kBitOffset = PrevFieldEndBit; + static constexpr int kBitCount = kBitCount_; + static constexpr int kEndBit = kBitOffset + kBitCount; + static_assert(kBitCount >= 1); + static_assert(kBitCount <= 64); + static_assert(kBitOffset >= 0 && kEndBit <= BitFieldsT::kBitCount); + static constexpr bool kIncludesTopBit = (kEndBit == BitFieldsT::kBitCount); + + static constexpr value_type kMask = + (value_type{1} << (kBitCount - 1) << 1) - 1; + + // no instances + unsigned_bit_field_impl() = delete; + + static constexpr value_type getFrom(const BitFieldsT& bf) { + return static_cast((bf.underlying >> kBitOffset) & kMask); + } + + static constexpr void setIn(BitFieldsT& bf, value_type value) { + bf.underlying &= ~(static_cast(kMask) << kBitOffset); + bf.underlying |= static_cast(value & kMask) << kBitOffset; + } + + // Create a transform for clearing this field to zero. + static and_transformer clearTransform() { + return and_transformer{ + ~(static_cast(kMask) << kBitOffset)}; + } + + // Create a transform for bitwise-and + static and_transformer andTransform(value_type value) { + assert((value & ~kMask) == 0); + return and_transformer{ + ~(static_cast(value ^ kMask) << kBitOffset)}; + } + + // Create a transform for bitwise-or + static or_transformer orTransform(value_type value) { + assert((value & ~kMask) == 0); + return or_transformer{ + static_cast(value) << kBitOffset}; + } + + // Create a transform for adding a particular value, but with the + // precondition that adding the value will not overflow the field. This + // applies for fields that do not include the top bit of the underlying + // representation. Can be combined with other additive transforms for other + // fields. + static add_transformer plusTransformPromiseNoOverflow( + value_type value) { + static_assert(!kIncludesTopBit); + add_transformer rv{ + static_cast(value) << kBitOffset}; +#ifndef NDEBUG + rv.preconditions.push_back( + {static_cast(kMask) << kBitOffset, rv.to_add}); +#endif // NDEBUG + return rv; + } + + // Create a transform for adding a particular value, but ignoring any + // overflow in that field. This applies for fields that include the top bit + // of the underlying representation. Can be combined with other additive + // transforms for other fields. + static add_transformer plusTransformIgnoreOverflow( + value_type value) { + static_assert(kIncludesTopBit); + add_transformer rv{ + static_cast(value) << kBitOffset}; + return rv; + } + + // Create a transform for subtracting a particular value, but with the + // precondition that subtracting the value will not underflow the field. This + // applies for fields that do not include the top bit of the underlying + // representation. Can be combined with other additive transforms for other + // fields. + static add_transformer minusTransformPromiseNoUnderflow( + value_type value) { + static_assert(!kIncludesTopBit); + add_transformer rv{ + underlying_type{0} - + (static_cast(value) << kBitOffset)}; +#ifndef NDEBUG + rv.preconditions.push_back( + {static_cast(kMask) << kBitOffset, rv.to_add}); +#endif // NDEBUG + return rv; + } + + // Create a transform for subtracting a particular value, but ignoring any + // underflow in that field. This applies for fields that include the top bit + // of the underlying representation. Can be combined with other additive + // transforms for other fields. + static add_transformer minusTransformIgnoreUnderflow( + value_type value) { + static_assert(kIncludesTopBit); + add_transformer rv{ + underlying_type{0} - + (static_cast(value) << kBitOffset)}; + return rv; + } +}; + +} // namespace detail + +// Placeholder for PrevField for the first field +struct no_prev_bit_field { + // no instances + no_prev_bit_field() = delete; + static constexpr int kEndBit = 0; +}; + +// For declaring a single-bit field accessed as a boolean. See example above on +// bit_fields +template +using bool_bit_field = + detail::bool_bit_field_impl; + +// For declaring a multi-bit field accessed as an unsigned int. See example +// above on bit_fields +template +using unsigned_bit_field = + detail::unsigned_bit_field_impl; + +} // namespace folly diff --git a/folly/lang/test/BUCK b/folly/lang/test/BUCK index dcf645bd230..31f85f4e2af 100644 --- a/folly/lang/test/BUCK +++ b/folly/lang/test/BUCK @@ -314,6 +314,16 @@ non_fbcode_target( "//xplat/folly/lang:vector_traits", ], ) + +non_fbcode_target( + _kind = folly_xplat_cxx_test, + name = "bit_fields_test", + srcs = ["BitFieldsTest.cpp"], + deps = [ + "fbsource//xplat/folly/portability:gtest", + "//xplat/folly/lang:bit_fields", + ], +) # !!!! fbcode/folly/lang/test/TARGETS was merged into this file, see https://fburl.com/workplace/xl8l9yuo for more info !!!! fbcode_target( @@ -389,6 +399,16 @@ fbcode_target( ], ) +fbcode_target( + _kind = cpp_unittest, + name = "bit_fields_test", + srcs = ["BitFieldsTest.cpp"], + deps = [ + "//folly/lang:bit_fields", + "//folly/portability:gtest", + ], +) + fbcode_target( _kind = cpp_unittest, name = "bits_test", diff --git a/folly/lang/test/BitFieldsTest.cpp b/folly/lang/test/BitFieldsTest.cpp new file mode 100644 index 00000000000..22020ad5275 --- /dev/null +++ b/folly/lang/test/BitFieldsTest.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +class BitFieldsTest : public testing::Test {}; + +TEST_F(BitFieldsTest, BasicUsage) { + // Start by verifying example from bit_fields comment + struct MyState : public folly::bit_fields {}; + + using Field1 = + folly::unsigned_bit_field; + using Field2 = folly::bool_bit_field; + using Field3 = folly::bool_bit_field; + using Field4a = folly::unsigned_bit_field; + // Can support variant bit fields, at your own risk + using Field4b = folly::bool_bit_field; + using Field5b = folly::unsigned_bit_field; + + auto state = + MyState{}.with(45U).with(true).with(true); + state.set(3U); + + EXPECT_EQ(state.get(), 45U); + EXPECT_EQ(state.get(), true); + EXPECT_EQ(state.get(), true); + EXPECT_EQ(state.get(), 3U); + + // As if Field3 indicates which variant is used for remaining fields + state.set(false); + state.set(true); + state.set(5U); + + EXPECT_EQ(state.get(), 45U); + EXPECT_EQ(state.get(), true); + EXPECT_EQ(state.get(), false); + EXPECT_EQ(state.get(), true); + EXPECT_EQ(state.get(), 5U); + + MyState state2; + EXPECT_NE(state, state2); + state.set(false); + state.set(false); + state.set(0U); + EXPECT_NE(state, state2); + state.set(0U); + // Back to all zeros + EXPECT_EQ(state, state2); + + // Misc operators + auto ref = state.ref(); + auto ref2 = std::move(ref); + ref2 = true; + EXPECT_EQ(state.get(), true); + auto ref3 = state.ref(); + ref3 = 40U; + EXPECT_EQ(state.get(), 40U); + ref3 += 5U; + EXPECT_EQ(state.get(), 45U); + ref3 -= 38U; + EXPECT_EQ(state.get(), 7U); + ref3 |= 17U; + EXPECT_EQ(state.get(), 23U); + ref3 &= 48U; + EXPECT_EQ(state.get(), 16U); +} diff --git a/folly/synchronization/AtomicBitFields.h b/folly/synchronization/AtomicBitFields.h new file mode 100644 index 00000000000..1b87e01e926 --- /dev/null +++ b/folly/synchronization/AtomicBitFields.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace folly { + +// A handy wrapper for an atomic on some bit_fields type. +// +// For encapsulation, usual arithmetic atomic operations are only available by +// calling applyRelaxed() on Transforms returned from field classes. Example: +// +// auto transform = Field2::clearTransform() + Field4::clearTransform(); +// MyState old_state; +// my_atomic.applyRelaxed(transform, &old_state); +// auto field2_before_clearing = old_state.get(); +// +template class Atom = std::atomic> +class atomic_bit_fields : AtomicStruct { + private: + using underlying_type = typename BitFieldsT::underlying_type; + using base = AtomicStruct; + + public: + using value_type = BitFieldsT; + + atomic_bit_fields() = default; + ~atomic_bit_fields() = default; + constexpr /* implicit */ atomic_bit_fields(BitFieldsT v) noexcept : base(v) {} + + atomic_bit_fields(const atomic_bit_fields& other) noexcept + : base(other.load()) {} + atomic_bit_fields& operator=(const atomic_bit_fields& other) noexcept { + this->store(other.load()); + return *this; + } + + atomic_bit_fields(atomic_bit_fields&&) = delete; + atomic_bit_fields& operator=(atomic_bit_fields&&) = delete; + + using base::compare_exchange_strong; + using base::compare_exchange_weak; + using base::exchange; + using base::is_lock_free; + using base::load; + using base::operator BitFieldsT; + using base::store; + + BitFieldsT operator=(BitFieldsT v) noexcept { + store(v); + return v; + } + + void apply( + const or_transformer& transform, + std::memory_order mo = std::memory_order_seq_cst, + BitFieldsT* before = nullptr, + BitFieldsT* after = nullptr) { + underlying_type before_val = base::data.fetch_or(transform.to_or, mo); + if (before) { + before->underlying = before_val; + } + if (after) { + after->underlying = before_val | transform.to_or; + } + } + + void apply( + const and_transformer& transform, + std::memory_order mo = std::memory_order_seq_cst, + BitFieldsT* before = nullptr, + BitFieldsT* after = nullptr) { + underlying_type before_val = base::data.fetch_and(transform.to_and, mo); + if (before) { + before->underlying = before_val; + } + if (after) { + after->underlying = before_val & transform.to_and; + } + } + + void apply( + const add_transformer& transform, + std::memory_order mo = std::memory_order_seq_cst, + BitFieldsT* before = nullptr, + BitFieldsT* after = nullptr) { + underlying_type before_val = base::data.fetch_add(transform.to_add, mo); + transform.assertPreconditions(before_val); + if (before) { + before->underlying = before_val; + } + if (after) { + after->underlying = before_val + transform.to_add; + } + } +}; + +} // namespace folly diff --git a/folly/synchronization/AtomicStruct.h b/folly/synchronization/AtomicStruct.h index c8e7583160f..7df7d9749a4 100644 --- a/folly/synchronization/AtomicStruct.h +++ b/folly/synchronization/AtomicStruct.h @@ -53,7 +53,7 @@ struct AtomicStructRaw<3> { /// type <= 8 bytes. template class Atom = std::atomic> class AtomicStruct { - private: + protected: using Raw = _t>; static_assert(alignof(T) <= alignof(Raw), "underlying type is under-aligned"); diff --git a/folly/synchronization/BUCK b/folly/synchronization/BUCK index 5baa01f719c..c17f0157137 100644 --- a/folly/synchronization/BUCK +++ b/folly/synchronization/BUCK @@ -217,7 +217,7 @@ fb_dirsync_cpp_library( headers = ["LifoSem.h"], xplat_impl = folly_xplat_cxx_library, exported_deps = [ - ":atomic_struct", + ":atomic_bit_fields", ":saturating_semaphore", "//folly:c_portability", "//folly:indexed_mem_pool", @@ -477,3 +477,13 @@ fb_dirsync_cpp_library( "//folly/synchronization:throttled_lifo_sem", ], ) + +fb_dirsync_cpp_library( + name = "atomic_bit_fields", + headers = ["AtomicBitFields.h"], + xplat_impl = folly_xplat_cxx_library, + exported_deps = [ + ":atomic_struct", + "//folly/lang:bit_fields", + ], +) diff --git a/folly/synchronization/LifoSem.h b/folly/synchronization/LifoSem.h index 07c1570924e..a2fece71a64 100644 --- a/folly/synchronization/LifoSem.h +++ b/folly/synchronization/LifoSem.h @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include namespace folly { @@ -212,67 +212,61 @@ struct LifoSemNodeRecycler { } }; -/// LifoSemHead is a 64-bit struct that holds a 32-bit value, some state +/// LifoSemHead is a 64-bit packed atomic that holds a 32-bit value, some state /// bits, and a sequence number used to avoid ABA problems in the lock-free /// management of the LifoSem's wait lists. The value can either hold /// an integral semaphore value (if there are no waiters) or a node index -/// (see IndexedMemPool) for the head of a list of wait nodes -class LifoSemHead { - // What we really want are bitfields: - // uint64_t data : 32; uint64_t isNodeIdx : 1; uint64_t seq : 31; - // Unfortunately g++ generates pretty bad code for this sometimes (I saw - // -O3 code from gcc 4.7.1 copying the bitfields one at a time instead of - // in bulk, for example). We can generate better code anyway by assuming - // that setters won't be given values that cause under/overflow, and - // putting the sequence at the end where its planned overflow doesn't - // need any masking. - // - // data == 0 (empty list) with isNodeIdx is conceptually the same - // as data == 0 (no unclaimed increments) with !isNodeIdx, we always - // convert the former into the latter to make the logic simpler. - enum { - IsNodeIdxShift = 32, - IsShutdownShift = 33, - IsLockedShift = 34, - SeqShift = 35, - }; - enum : uint64_t { - IsNodeIdxMask = uint64_t(1) << IsNodeIdxShift, - IsShutdownMask = uint64_t(1) << IsShutdownShift, - IsLockedMask = uint64_t(1) << IsLockedShift, - SeqIncr = uint64_t(1) << SeqShift, - SeqMask = ~(SeqIncr - 1), - }; +/// (see IndexedMemPool) for the head of a list of wait nodes. +/// +/// Layout using bit_fields: +/// data : 32 bits (value or node index) +/// isNodeIdx : 1 bit +/// isShutdown: 1 bit +/// isLocked : 1 bit +/// seq : 29 bits (at the end, overflow is okay) +/// +/// data == 0 (empty list) with isNodeIdx is conceptually the same +/// as data == 0 (no unclaimed increments) with !isNodeIdx, we always +/// convert the former into the latter to make the logic simpler. +struct LifoSemHead : public bit_fields { + private: + using Data = unsigned_bit_field; + using IsNodeIdx = bool_bit_field; + using IsShutdown = bool_bit_field; + using IsLocked = bool_bit_field; + using Seq = unsigned_bit_field; - public: - uint64_t bits; + // Increment seq for a modification + constexpr LifoSemHead withNextSeq() const { return with(seq() + 1); } + public: //////// getters inline uint32_t idx() const { assert(isNodeIdx()); - assert(uint32_t(bits) != 0); - return uint32_t(bits); + assert(get() != 0); + return get(); } + inline uint32_t value() const { assert(!isNodeIdx()); - return uint32_t(bits); - } - inline constexpr bool isNodeIdx() const { - return (bits & IsNodeIdxMask) != 0; - } - inline constexpr bool isShutdown() const { - return (bits & IsShutdownMask) != 0; + return get(); } - inline constexpr bool isLocked() const { return (bits & IsLockedMask) != 0; } - inline constexpr uint32_t seq() const { return uint32_t(bits >> SeqShift); } + + inline constexpr bool isNodeIdx() const { return get(); } + + inline constexpr bool isShutdown() const { return get(); } + + inline constexpr bool isLocked() const { return get(); } + + inline constexpr uint32_t seq() const { return get(); } //////// setter-like things return a new struct /// This should only be used for initial construction, not for setting /// the value, because it clears the sequence number static inline constexpr LifoSemHead fresh(uint32_t value) { - return LifoSemHead{value}; + return LifoSemHead{}.with(value); } /// Returns the LifoSemHead that results from popping a waiter node, @@ -280,16 +274,14 @@ class LifoSemHead { inline LifoSemHead withPop(uint32_t idxNext) const { assert(!isLocked()); assert(isNodeIdx()); - if (idxNext == 0) { - // no isNodeIdx bit or data bits. Wraparound of seq bits is okay - return LifoSemHead{(bits & (SeqMask | IsShutdownMask)) + SeqIncr}; - } else { - // preserve sequence bits (incremented with wraparound okay) and - // isNodeIdx bit, replace all data bits - return LifoSemHead{ - (bits & (SeqMask | IsShutdownMask | IsNodeIdxMask)) + SeqIncr + - idxNext}; - } + // Build result from empty, setting only the fields we need. + // Preserves isShutdown, clears isLocked (already asserted false), + // sets data and isNodeIdx based on idxNext, increments seq. + return LifoSemHead{} + .with(idxNext) + .with(idxNext != 0) + .with(isShutdown()) + .with(seq() + 1); } /// Returns the LifoSemHead that results from pushing a new waiter node @@ -298,39 +290,50 @@ class LifoSemHead { assert(isNodeIdx() || value() == 0); assert(!isShutdown()); assert(_idx != 0); - return LifoSemHead{(bits & SeqMask) | IsNodeIdxMask | _idx}; + // Build result from empty, setting only the fields we need. + // Seq is preserved (no increment on push), shutdown is known false. + return LifoSemHead{}.with(_idx).with(true).with( + seq()); } /// Returns the LifoSemHead with value increased by delta, with - /// saturation if the maximum value is reached + /// saturation (no overflow) if the maximum value is reached inline LifoSemHead withValueIncr(uint32_t delta) const { assert(!isLocked()); assert(!isNodeIdx()); - auto rv = LifoSemHead{bits + SeqIncr + delta}; + // Ugly-but-optimized: direct bit manipulation for single add operation + // with overflow checking *after* application. + constexpr uint64_t kSeqIncr = uint64_t{1} << Seq::kBitOffset; + LifoSemHead rv; + rv.underlying = underlying + kSeqIncr + delta; + static_assert(IsNodeIdx::kBitOffset == Data::kEndBit); if (FOLLY_UNLIKELY(rv.isNodeIdx())) { - // value has overflowed into the isNodeIdx bit - rv = LifoSemHead{(rv.bits & ~IsNodeIdxMask) | (IsNodeIdxMask - 1)}; + // Overflow detected: clear overflow and saturate + rv.set(false); + rv.set(Data::kMask); } return rv; } - /// Returns the LifoSemHead that results from decrementing the value + /// Returns the LifoSemHead that results from decrementing the value. + /// Caller guarantees the delta is not greater than the current value. inline LifoSemHead withValueDecr(uint32_t delta) const { assert(!isLocked()); assert(delta > 0 && delta <= value()); - return LifoSemHead{bits + SeqIncr - delta}; + // NOTE: optimized for efficiency + return transformed( + Data::minusTransformPromiseNoUnderflow(delta) + + Seq::plusTransformIgnoreOverflow(1)); } /// Returns the LifoSemHead with the same state as the current node, /// but with the shutdown bit set - inline LifoSemHead withShutdown() const { - return LifoSemHead{bits | IsShutdownMask}; - } + inline LifoSemHead withShutdown() const { return with(true); } // Returns LifoSemHead with lock bit set, but rest of bits unchanged. inline LifoSemHead withLock() const { assert(!isLocked()); - return LifoSemHead{bits | IsLockedMask}; + return with(true); } // Returns LifoSemHead with lock bit unset, and updated seqno based @@ -338,14 +341,7 @@ class LifoSemHead { inline LifoSemHead withoutLock(uint32_t idxNext) const { assert(isLocked()); // We need to treat this as a pop, as we may change the list head. - return LifoSemHead{bits & ~IsLockedMask}.withPop(idxNext); - } - - inline constexpr bool operator==(const LifoSemHead& rhs) const { - return bits == rhs.bits; - } - inline constexpr bool operator!=(const LifoSemHead& rhs) const { - return !(*this == rhs); + return with(false).withPop(idxNext); } }; @@ -663,7 +659,7 @@ struct LifoSemBase { } private: - cacheline_aligned> head_; + cacheline_aligned> head_; static LifoSemNode& idxToNode(uint32_t idx) { auto raw = &LifoSemRawNode::pool()[idx]; diff --git a/folly/synchronization/test/AtomicBitFieldsTest.cpp b/folly/synchronization/test/AtomicBitFieldsTest.cpp new file mode 100644 index 00000000000..b9d7c141a64 --- /dev/null +++ b/folly/synchronization/test/AtomicBitFieldsTest.cpp @@ -0,0 +1,228 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +class BitFieldsAtomicTest : public testing::Test {}; + +struct MyState : public folly::bit_fields {}; + +TEST_F(BitFieldsAtomicTest, Basic) { + using Field1 = + folly::unsigned_bit_field; + using Field2 = folly::bool_bit_field; + using Field3 = folly::bool_bit_field; + using Field4a = folly::unsigned_bit_field; + // Can support variant bit fields, at your own risk + using Field4b = folly::bool_bit_field; + using Field5b = folly::unsigned_bit_field; + + auto state = + MyState{}.with(45U).with(true).with(true); + state.set(3U); + + EXPECT_EQ(state.get(), 45U); + EXPECT_EQ(state.get(), true); + EXPECT_EQ(state.get(), true); + EXPECT_EQ(state.get(), 3U); + + // As if Field3 indicates which variant is used for remaining fields + state.set(false); + state.set(true); + state.set(5U); + + EXPECT_EQ(state.get(), 45U); + EXPECT_EQ(state.get(), true); + EXPECT_EQ(state.get(), false); + EXPECT_EQ(state.get(), true); + EXPECT_EQ(state.get(), 5U); + + MyState state2; + + folly::atomic_bit_fields atomic{state}; + EXPECT_EQ(state, atomic.load()); + EXPECT_NE(state, state2); + atomic.store(state2); + EXPECT_EQ(state2, atomic.load()); + MyState state3 = atomic.exchange(state); + EXPECT_EQ(state2, state3); + EXPECT_TRUE(atomic.compare_exchange_strong(state, state2)); + while (!atomic.compare_exchange_weak(state2, state)) { + } + EXPECT_EQ(state2, state3); + MyState state4 = atomic; // Implicit conversion + EXPECT_EQ(state, state4); + atomic = state2; // operator= + EXPECT_EQ(state2, atomic.load()); +} + +TEST_F(BitFieldsAtomicTest, CopyConstructor) { + using Field1 = + folly::unsigned_bit_field; + using Field2 = folly::bool_bit_field; + + auto state = MyState{}.with(123U).with(true); + folly::atomic_bit_fields atomic1{state}; + + folly::atomic_bit_fields atomic2{atomic1}; + EXPECT_EQ(atomic1.load(), atomic2.load()); + EXPECT_EQ(state, atomic2.load()); +} + +TEST_F(BitFieldsAtomicTest, CopyAssignment) { + using Field1 = + folly::unsigned_bit_field; + using Field2 = folly::bool_bit_field; + + auto state1 = MyState{}.with(100U).with(true); + auto state2 = MyState{}.with(200U).with(false); + + folly::atomic_bit_fields atomic1{state1}; + folly::atomic_bit_fields atomic2{state2}; + + EXPECT_NE(atomic1.load(), atomic2.load()); + + atomic2 = atomic1; + EXPECT_EQ(atomic1.load(), atomic2.load()); + EXPECT_EQ(state1, atomic2.load()); +} + +TEST_F(BitFieldsAtomicTest, DefaultConstructor) { + folly::atomic_bit_fields atomic; + MyState defaultState; + EXPECT_EQ(defaultState, atomic.load()); +} + +TEST_F(BitFieldsAtomicTest, Transforms) { + using Field1 = + folly::unsigned_bit_field; + using Field2 = folly::bool_bit_field; + using Field3 = folly::bool_bit_field; + using Field4 = folly::unsigned_bit_field; + + auto state = + MyState{}.with(45U).with(true).with(true); + state.set(3U); + + folly::atomic_bit_fields atomic{state}; + + auto transform1 = Field2::clearTransform() + Field3::clearTransform(); + MyState before, after; + atomic.apply(transform1, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(before, state); + EXPECT_NE(after, state); + EXPECT_EQ(after.get(), false); + EXPECT_EQ(after.get(), false); + + auto transform2 = Field2::setTransform() + Field3::setTransform(); + atomic.apply(transform2, std::memory_order_acq_rel, &before, &after); + EXPECT_NE(before, state); + EXPECT_EQ(before.get(), false); + EXPECT_EQ(before.get(), false); + EXPECT_EQ(after, state); + + EXPECT_EQ(state.get(), 45U); + EXPECT_EQ(after.get(), true); + EXPECT_EQ(after.get(), true); + EXPECT_EQ(state.get(), 3U); + + auto transform2a = Field2::andTransform(true) + Field3::andTransform(false); + atomic.apply(transform2a, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(after.get(), true); + EXPECT_EQ(after.get(), false); + + auto transform2b = Field2::andTransform(false) + Field3::andTransform(true); + atomic.apply(transform2b, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(after.get(), false); + EXPECT_EQ(after.get(), false); + + auto transform2c = Field2::orTransform(true) + Field3::orTransform(false); + atomic.apply(transform2c, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(after.get(), true); + EXPECT_EQ(after.get(), false); + + auto transform2d = Field2::orTransform(false) + Field3::orTransform(true); + atomic.apply(transform2d, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(after.get(), true); + EXPECT_EQ(after.get(), true); + + EXPECT_EQ(state.get(), 45U); + EXPECT_EQ(state.get(), 3U); + + auto transform3 = Field1::plusTransformPromiseNoOverflow(10000U) + + Field4::minusTransformPromiseNoUnderflow(3U); + atomic.apply(transform3, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(before, state); + EXPECT_NE(after, state); + EXPECT_EQ(after.get(), 10045U); + EXPECT_EQ(after.get(), 0U); + + auto transform4 = Field1::minusTransformPromiseNoUnderflow(999U) + + Field4::plusTransformPromiseNoOverflow(31U); + atomic.apply(transform4, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(after.get(), 9046U); + EXPECT_EQ(after.get(), 31U); + + auto transform4a = + Field1::andTransform(8192U + 4096U) + Field4::andTransform(15U); + atomic.apply(transform4a, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(after.get(), 8192U); + EXPECT_EQ(after.get(), 15U); + + auto transform4b = Field1::orTransform(127U) + Field4::orTransform(16U); + atomic.apply(transform4b, std::memory_order_acq_rel, &before, &after); + EXPECT_EQ(after.get(), 8192U + 127U); + EXPECT_EQ(after.get(), 31U); + + // Unmodified + EXPECT_EQ(after.get(), true); + EXPECT_EQ(after.get(), true); +} + +TEST_F(BitFieldsAtomicTest, TopBitField) { + using Field1 = + folly::unsigned_bit_field; + using Field2 = folly::bool_bit_field; + using Field3 = folly::bool_bit_field; + using Field4 = folly::unsigned_bit_field; + using Field5 = folly::unsigned_bit_field; + + folly::atomic_bit_fields atomic{MyState{}}; + MyState before, after; + + // A field at the limit of upper bits is allowed to over/underflow + atomic.store(MyState{}.with(0)); // Field5 at 0 + atomic.apply( + Field5::minusTransformIgnoreUnderflow(1U), + std::memory_order_acq_rel, + &before, + &after); // "Safe" underflow + EXPECT_EQ(after.get(), 511U); + atomic.apply( + Field5::plusTransformIgnoreOverflow(1U), + std::memory_order_acq_rel, + &before, + &after); // "Safe" overflow + EXPECT_EQ(after.get(), 0U); + atomic.apply( + Field5::plusTransformIgnoreOverflow(2048U), + std::memory_order_acq_rel, + &before, + &after); // "Safe" overflow + EXPECT_EQ(after.get(), 0U); +} diff --git a/folly/synchronization/test/BUCK b/folly/synchronization/test/BUCK index e671614aa41..e717f63c397 100644 --- a/folly/synchronization/test/BUCK +++ b/folly/synchronization/test/BUCK @@ -577,3 +577,13 @@ fbcode_target( "//folly/synchronization:striped_throttled_lifo_sem", ], ) + +fbcode_target( + _kind = cpp_unittest, + name = "atomic_bit_fields_test", + srcs = ["AtomicBitFieldsTest.cpp"], + deps = [ + "//folly/portability:gtest", + "//folly/synchronization:atomic_bit_fields", + ], +)