From 3330a119ff993a2683e44a5c4eb9ea8504d8b093 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Sun, 8 Mar 2026 03:08:40 +0900 Subject: [PATCH] sparc: Support 64-bit atomics on v8plus --- Cargo.toml | 2 +- README.md | 54 +- build.rs | 13 +- src/arch/sparc.rs | 304 +++++- src/lib.rs | 57 +- .../asm/atomic-maybe-uninit/sparcv8plus.asm | 962 ++++++++++++++++++ 6 files changed, 1328 insertions(+), 64 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7ffac187..1a53c86b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,7 +61,7 @@ unexpected_cfgs = { level = "warn", check-cfg = [ 'cfg(target_feature,values("lse2","lse128","rcpc3"))', # 1.82+ https://github.com/rust-lang/rust/pull/128192 'cfg(target_feature,values("partword-atomics","quadword-atomics"))', # 1.83+ https://github.com/rust-lang/rust/pull/130873 'cfg(target_feature,values("zaamo","zabha","zalrsc"))', # 1.83+ https://github.com/rust-lang/rust/pull/130877 - 'cfg(target_feature,values("leoncasa","v9"))', # 1.84+ https://github.com/rust-lang/rust/pull/132552 + 'cfg(target_feature,values("leoncasa","v8plus","v9"))', # 1.84+ https://github.com/rust-lang/rust/pull/132552 'cfg(target_feature,values("x87"))', # 1.85+ https://github.com/rust-lang/rust/pull/133099 'cfg(target_feature,values("isa-68020"))', # 1.85+ https://github.com/rust-lang/rust/pull/134329 'cfg(target_feature,values("zacas"))', # 1.87+ https://github.com/rust-lang/rust/pull/137417 diff --git a/README.md b/README.md index ebcdec5b..60ec4991 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ This crate provides a way to soundly perform such operations. ## Platform Support -Currently, all CPU architectures supported by Rust (x86, x86_64, Arm, AArch64, RISC-V, LoongArch, Arm64EC, s390x, MIPS, PowerPC, MSP430, AVR, SPARC, Hexagon, M68k, C-SKY, and Xtensa) are supported. -(You can use `cfg_{has,no}_*` macros to write code based on whether or not which size of primitives is available.) +Currently, all CPU architectures supported by Rust (x86, x86_64, Arm, AArch64, Arm64EC, RISC-V, LoongArch, s390x, PowerPC, MIPS, SPARC, AVR, MSP430, Hexagon, M68k, C-SKY, and Xtensa) are supported. +(You can use `cfg_{has,no}_*` macros to write code based on which primitive sizes are available for the current target and Rust version.) | target_arch | primitives | load/store | swap/CAS | | ------------------------------------------- | --------------------------------------------------- |:----------:|:--------:| @@ -30,39 +30,43 @@ Currently, all CPU architectures supported by Rust (x86, x86_64, Arm, AArch64, R | arm (v6+ or Linux/Android) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | | arm (except for M-profile) \[3] | i64,u64 | ✓ | ✓ | | aarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| arm64ec \[10] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | | riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | | riscv32 (+zacas) \[4] | i64,u64 | ✓ | ✓ | | riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | | riscv64 (+zacas) \[4] | i128,u128 | ✓ | ✓ | | loongarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| loongarch32 \[8] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| arm64ec \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| s390x \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| powerpc \[9] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| powerpc64 \[9] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc64 (+quadword-atomics) \[5] \[9] | i128,u128 | ✓ | ✓ | -| mips / mips32r6 \[10] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| mips64 / mips64r6 \[10] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| msp430 \[10] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | -| avr \[10] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | -| sparc \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| sparc64 \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| hexagon \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| m68k \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| m68k (+isa-68020) \[6] \[10] (experimental) | i64,u64 | ✓ | ✓ | -| csky \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| xtensa \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| loongarch32 \[11] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| s390x \[10] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| powerpc \[12] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| powerpc64 \[12] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc64 (+quadword-atomics) \[6] \[12] | i128,u128 | ✓ | ✓ | +| mips / mips32r6 \[13] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| mips64 / mips64r6 \[13] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| sparc \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| sparc (+v8plus) \[8] \[13] (experimental) | i64,u64 | ✓ | ✓ | +| sparc64 \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| avr \[13] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | +| msp430 \[13] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | +| hexagon \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| m68k \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| m68k (+isa-68020) \[9] \[13] (experimental) | i64,u64 | ✓ | ✓ | +| csky \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| xtensa \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | \[1] Arm's RMW operations are not available on Armv6-M (thumbv6m). RISC-V's RMW operations are not available on targets without the A (or G which means IMAFD) or Zalrsc or Zacas extension, such as riscv32i, riscv32imc, etc. 32-bit SPARC's RMW operations requires `v9` or `leoncasa` target feature (enabled by default on Linux). M68k's atomic RMW operations requires target-cpu M68020+ (enabled by default on Linux). C-SKY's atomic RMW operations requires target-cpu ck860\* or c860\* (enabled by default on the hard-float target). Xtensa's atomic RMW operations are not available on esp32s2.
\[2] Requires `cmpxchg16b` target feature (enabled by default on Apple, Windows (except Windows 7), and Fuchsia targets).
\[3] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.
\[4] Requires `zacas` target feature.
-\[5] Requires `quadword-atomics` target feature (enabled by default on powerpc64le).
-\[6] Requires target-cpu M68020+ (enabled by default on Linux).
-\[7] Requires Rust 1.84+.
-\[8] Requires Rust 1.91+.
-\[9] Requires Rust 1.95+.
-\[10] Requires nightly due to `#![feature(asm_experimental_arch)]`.
+\[6] Requires `quadword-atomics` target feature (enabled by default on powerpc64le).
+\[8] Requires `v9` and `v8plus` target features (both enabled by default on Linux).
+\[9] Requires target-cpu M68020+ (enabled by default on Linux).
+\[10] Requires Rust 1.84+.
+\[11] Requires Rust 1.91+.
+\[12] Requires Rust 1.95+.
+\[13] Requires nightly due to `#![feature(asm_experimental_arch)]`.
+ + See also [Atomic operation overview by architecture](https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md) for more information about atomic operations in these architectures. diff --git a/build.rs b/build.rs index 6b794536..b4a9b897 100644 --- a/build.rs +++ b/build.rs @@ -42,7 +42,7 @@ fn main() { // TODO: handle multi-line target_feature_fallback // grep -F 'target_feature_fallback("' build.rs | grep -Ev '^ *//' | sed -E 's/^.*target_feature_fallback\(//; s/",.*$/"/' | LC_ALL=C sort -u | tr '\n' ',' | sed -E 's/,$/\n/' println!( - r#"cargo:rustc-check-cfg=cfg(atomic_maybe_uninit_target_feature,values("a","fast-serialization","isa-68020","leoncasa","lowbytefirst","lse128","lse2","mclass","msync","partword-atomics","quadword-atomics","rcpc3","rmw","thumb-mode","thumb2","tinyencoding","v5te","v6","v7","v8","v8m","v9","x87","zaamo","zabha","zacas","zalrsc"))"# + r#"cargo:rustc-check-cfg=cfg(atomic_maybe_uninit_target_feature,values("a","fast-serialization","isa-68020","leoncasa","lowbytefirst","lse128","lse2","mclass","msync","partword-atomics","quadword-atomics","rcpc3","rmw","thumb-mode","thumb2","tinyencoding","v5te","v6","v7","v8","v8m","v8plus","v9","x87","zaamo","zabha","zacas","zalrsc"))"# ); } @@ -442,6 +442,7 @@ fn main() { let mut leoncasa = false; let mut v9 = false; let mut v7 = false; + let is_linux_or_solaris = target_os == "linux" || target_os == "solaris"; if let Some(cpu) = target_cpu() { // https://github.com/llvm/llvm-project/blob/llvmorg-22.1.0-rc1/llvm/lib/Target/Sparc/Sparc.td#L143 match &*cpu { @@ -450,21 +451,25 @@ fn main() { | "ma2480" | "ma2485" | "ma2x8x" | "gr712rc" | "leon4" | "gr740" => { leoncasa = true; } + // v8plus is ABI feature so not associated with -C target-cpu. "v9" | "ultrasparc" | "ultrasparc3" | "niagara" | "niagara2" | "niagara3" | "niagara4" => v9 = true, "v7" => v7 = true, _ => {} } } else { - // https://github.com/llvm/llvm-project/blob/llvmorg-22.1.0-rc1/clang/lib/Driver/ToolChains/Arch/Sparc.cpp#L136 - // https://github.com/rust-lang/rust/blob/1.90.0/compiler/rustc_target/src/spec/targets/sparc_unknown_linux_gnu.rs#L19 - v9 = target_os == "linux" || target_os == "solaris"; + // https://github.com/rust-lang/rust/blob/1.94.0/compiler/rustc_target/src/spec/targets/sparc_unknown_linux_gnu.rs#L19 + // https://github.com/llvm/llvm-project/blob/llvmorg-22.1.0/clang/lib/Driver/ToolChains/Arch/Sparc.cpp#L169 + v9 = is_linux_or_solaris; } // target_feature "leoncasa"/"v9" is unstable and available on rustc side since nightly-2024-11-11: https://github.com/rust-lang/rust/pull/132552 // Note: nightly-2024-11-10 is unavailable: https://github.com/rust-lang/rust/issues/132838 if !version.probe(84, 2024, 11, 10) || needs_target_feature_fallback(&version, None) { target_feature_fallback("leoncasa", leoncasa); target_feature_fallback("v9", v9); + // https://github.com/rust-lang/rust/blob/1.94.0/compiler/rustc_target/src/spec/targets/sparc_unknown_linux_gnu.rs#L18 + // https://github.com/llvm/llvm-project/blob/llvmorg-22.1.0/clang/lib/Driver/ToolChains/Arch/Sparc.cpp#L170 + target_feature_fallback("v8plus", is_linux_or_solaris); } if v7 { // SPARC-V7 has no STBAR. diff --git a/src/arch/sparc.rs b/src/arch/sparc.rs index fa0bc8eb..11bc1b33 100644 --- a/src/arch/sparc.rs +++ b/src/arch/sparc.rs @@ -548,10 +548,274 @@ macro_rules! atomic_sub_word { atomic_sub_word!(u8, "b"); atomic_sub_word!(u16, "h"); atomic!(u32, "", "%icc"); -// TODO: V8+ with 64-bit g/o reg #[cfg(target_arch = "sparc64")] atomic!(u64, "x", "%xcc"); +// ----------------------------------------------------------------------------- +// 64-bit atomics on SPARC-V8+ + +// Use .4byte directive because "error: instruction requires a CPU feature not currently enabled" error (as of LLVM 22) + +#[cfg(all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), +))] +const _: () = assert!(mem::align_of::() >= mem::size_of::()); +#[cfg(all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), +))] +delegate_signed!(delegate_all, u64); +#[cfg(all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), +))] +impl AtomicLoad for u64 { + #[inline] + unsafe fn atomic_load(src: *const MaybeUninit, order: Ordering) -> MaybeUninit { + debug_assert_atomic_unsafe_precondition!(src, u64); + let mut out = MaybeUninit::::uninit(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_load { + ($acquire:expr) => { + asm!( + // "ldx [%i0], %o0", // atomic { o0 = *i0 } + ".4byte 0xd05e0000", + $acquire, // fence + // "stx %o0, [%i1]", // *i1 = o0 + ".4byte 0xd0764000", + in("i0") src, + in("i1") out.as_mut_ptr(), + out("o0") _, + options(nostack, preserves_flags), + ) + // asm!( + // "ldx [{src}], %o0", // atomic { o0 = *src } + // $acquire, // fence + // "stx %o0, [{out}]", // *out = o0 + // src = in(reg) src, + // out = in(reg) out.as_mut_ptr(), + // out("o0") _, + // options(nostack, preserves_flags), + // ) + }; + } + match order { + Ordering::Relaxed => atomic_load!(""), + // Acquire and SeqCst loads are equivalent. + Ordering::Acquire | Ordering::SeqCst => atomic_load!("membar #LoadStore|#LoadLoad"), + _ => crate::utils::unreachable_unchecked(), + } + } + out + } +} +#[cfg(all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), +))] +impl AtomicStore for u64 { + #[inline] + unsafe fn atomic_store(dst: *mut MaybeUninit, val: MaybeUninit, order: Ordering) { + debug_assert_atomic_unsafe_precondition!(dst, u64); + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! atomic_store { + ($acquire:expr, $release:expr) => { + asm!( + // "ldx [%i1], %o0", // o0 = *i1 + ".4byte 0xd05e4000", + $release, // fence + // "stx %o0, [%i0]", // atomic { *i0 = o0 } + ".4byte 0xd0760000", + $acquire, // fence + in("i0") dst, + in("i1") val.as_ptr(), + out("o0") _, + options(nostack, preserves_flags), + ) + // asm!( + // "ldx [{val}], %o0", // o0 = *val + // $release, // fence + // "stx %o0, [{dst}]", // atomic { *dst = o0 } + // $acquire, // fence + // dst = in(reg) dst, + // val = in(reg) val.as_ptr(), + // out("o0") _, + // options(nostack, preserves_flags), + // ) + }; + } + match order { + Ordering::Relaxed => atomic_store!("", ""), + Ordering::Release => atomic_store!("", "membar #StoreStore|#LoadStore"), + Ordering::SeqCst => atomic_store!( + "membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad", + "membar #StoreStore|#LoadStore" + ), + _ => crate::utils::unreachable_unchecked(), + } + } + } +} +#[cfg(all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), +))] +impl AtomicSwap for u64 { + #[inline] + unsafe fn atomic_swap( + dst: *mut MaybeUninit, + val: MaybeUninit, + order: Ordering, + ) -> MaybeUninit { + debug_assert_atomic_unsafe_precondition!(dst, u64); + let mut out = MaybeUninit::::uninit(); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! swap { + ($acquire:expr, $release:expr, $_leon_nop:expr) => { + asm!( + // "ldx [%i1], %o0", // o0 = *i1 + ".4byte 0xd05e4000", + $release, // fence + // "ldx [%i0], %o2", // atomic { o2 = *i0 } + ".4byte 0xd45e0000", + "2:", // 'retry: + "mov %o0, %o1", // o1 = o0 + // cas!("x", "[%i0]", "%o2", "%o1"), // atomic { _x = *i0; if _x == o2 { *i0 = o1 }; o1 = _x } + ".4byte 0xd3f6100a", + "cmp %o1, %o2", // if o1 == o2 { cc.Z = true } else { cc.Z = false } + "mov %g0, %i3", // i3 = 0 + // move_!("%xcc", "1", "%i3"), // if cc.Z { i3 = 1 } + ".4byte 0xb7647001", + "cmp %i3, 1", // if r == 1 { cc.Z = true } else { cc.Z = false } + bne_a!("%icc", "2b"), // if !cc.Z { + "mov %o1, %o2", // o2 = o1; jump 'retry } + $acquire, // fence + // "stx %o1, [%i2]", // *i2 = o1 + ".4byte 0xd2768000", + in("i0") dst, + in("i1") val.as_ptr(), + in("i2") out.as_mut_ptr(), + out("o0") _, // val + out("o1") _, // out + out("o2") _, // tmp + out("i3") _, + // Do not use `preserves_flags` because CMP modifies the condition codes. + options(nostack), + ) + // asm!( + // "ldx [{val}], %o0", // o0 = *val + // $release, // fence + // "ldx [{dst}], %o2", // atomic { o2 = *dst } + // "2:", // 'retry: + // "mov %o0, %o1", // o1 = o0 + // cas!("x", "[{dst}]", "%o2", "%o1"), // atomic { _x = *dst; if _x == o2 { *dst = o1 }; o1 = _x } + // "cmp %o1, %o2", // if o1 == o2 { cc.Z = true } else { cc.Z = false } + // bne_a!("%xcc", "2b"), // if !cc.Z { + // "mov %o1, %o2", // o2 = o1; jump 'retry } + // $acquire, // fence + // "stx %o1, [{out}]", // *out = o1 + // dst = in(reg) dst, + // val = in(reg) val.as_ptr(), + // out = in(reg) out.as_mut_ptr(), + // out("o0") _, // val + // out("o1") _, // out + // out("o2") _, // tmp + // // Do not use `preserves_flags` because CMP modifies the condition codes. + // options(nostack), + // ) + }; + } + atomic_rmw!(swap, order); + } + out + } +} +#[cfg(all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), +))] +impl AtomicCompareExchange for u64 { + #[inline] + unsafe fn atomic_compare_exchange( + dst: *mut MaybeUninit, + old: MaybeUninit, + new: MaybeUninit, + success: Ordering, + failure: Ordering, + ) -> (MaybeUninit, bool) { + debug_assert_atomic_unsafe_precondition!(dst, u64); + let order = crate::utils::upgrade_success_ordering(success, failure); + let mut out = new; + let mut r: RegSize; + + // SAFETY: the caller must uphold the safety contract. + unsafe { + macro_rules! cmpxchg { + ($acquire:expr, $release:expr, $_leon_nop:expr) => { + asm!( + // "ldx [%i1], %o0", // o0 = *i1 + ".4byte 0xd05e4000", + // "ldx [%i2], %o1", // o1 = *i2 + ".4byte 0xd25e8000", + $release, // fence + // cas!("x", "[%i0]", "%o0", "%o1"), // atomic { _x = *i0; if _x == o0 { *i0 = o1 }; o1 = _x } + ".4byte 0xd3f61008", + "cmp %o1, %o0", // if o1 == o0 { cc.Z = true } else { cc.Z = false } + $acquire, // fence + // "stx %o1, [%i2]", // *i2 = o1 + ".4byte 0xd2768000", + "mov %g0, %i2", // i2 = 0 + // move_!("%xcc", "1", "%i2"), // if cc.Z { i2 = 1 } + ".4byte 0xb5647001", + in("i0") dst, + in("i1") old.as_ptr(), + in("i2") out.as_mut_ptr(), + lateout("i2") r, + out("o0") _, // old + out("o1") _, // new => out + // Do not use `preserves_flags` because CMP modifies the condition codes. + options(nostack), + ) + // asm!( + // "ldx [{old}], %o0", // o0 = *old + // "ldx [{out}], %o1", // o1 = *out + // $release, // fence + // cas!("x", "[{dst}]", "%o0", "%o1"), // atomic { _x = *dst; if _x == o0 { *dst = o1 }; o1 = _x } + // "cmp %o1, %o0", // if o1 == o0 { cc.Z = true } else { cc.Z = false } + // $acquire, // fence + // "stx %o1, [{out}]", // *out = o1 + // "mov %g0, {r}", // r = 0 + // move_!("%xcc", "1", "{r}"), // if cc.Z { r = 1 } + // dst = in(reg) dst, + // old = in(reg) old.as_ptr(), + // out = in(reg) out.as_mut_ptr(), + // r = lateout(reg) r, + // out("o0") _, // old + // out("o1") _, // new => out + // // Do not use `preserves_flags` because CMP modifies the condition codes. + // options(nostack), + // ) + }; + } + atomic_rmw!(cmpxchg, order); + crate::utils::assert_unchecked(r == 0 || r == 1); // may help remove extra test + } + (out, r != 0) + } +} + // ----------------------------------------------------------------------------- // cfg macros @@ -579,24 +843,50 @@ macro_rules! cfg_has_atomic_32 { macro_rules! cfg_no_atomic_32 { ($($tt:tt)*) => {}; } -// TODO: V8+ with 64-bit g/o reg -#[cfg(target_arch = "sparc")] +#[cfg(not(any( + target_arch = "sparc64", + all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), + ), +)))] #[macro_export] macro_rules! cfg_has_atomic_64 { ($($tt:tt)*) => {}; } -// TODO: V8+ with 64-bit g/o reg -#[cfg(target_arch = "sparc")] +#[cfg(not(any( + target_arch = "sparc64", + all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), + ), +)))] #[macro_export] macro_rules! cfg_no_atomic_64 { ($($tt:tt)*) => { $($tt)* }; } -#[cfg(target_arch = "sparc64")] +#[cfg(any( + target_arch = "sparc64", + all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), + ), +))] #[macro_export] macro_rules! cfg_has_atomic_64 { ($($tt:tt)*) => { $($tt)* }; } -#[cfg(target_arch = "sparc64")] +#[cfg(any( + target_arch = "sparc64", + all( + target_arch = "sparc", + any(target_feature = "v9", atomic_maybe_uninit_target_feature = "v9"), + any(target_feature = "v8plus", atomic_maybe_uninit_target_feature = "v8plus"), + ), +))] #[macro_export] macro_rules! cfg_no_atomic_64 { ($($tt:tt)*) => {}; diff --git a/src/lib.rs b/src/lib.rs index cd16f56b..08abf6fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,8 +16,8 @@ This crate provides a way to soundly perform such operations. ## Platform Support -Currently, all CPU architectures supported by Rust (x86, x86_64, Arm, AArch64, RISC-V, LoongArch, Arm64EC, s390x, MIPS, PowerPC, MSP430, AVR, SPARC, Hexagon, M68k, C-SKY, and Xtensa) are supported. -(You can use `cfg_{has,no}_*` macros to write code based on whether or not which size of primitives is available.) +Currently, all CPU architectures supported by Rust (x86, x86_64, Arm, AArch64, Arm64EC, RISC-V, LoongArch, s390x, PowerPC, MIPS, SPARC, AVR, MSP430, Hexagon, M68k, C-SKY, and Xtensa) are supported. +(You can use `cfg_{has,no}_*` macros to write code based on which primitive sizes are available for the current target and Rust version.) | target_arch | primitives | load/store | swap/CAS | | ------------------------------------------- | --------------------------------------------------- |:----------:|:--------:| @@ -27,39 +27,43 @@ Currently, all CPU architectures supported by Rust (x86, x86_64, Arm, AArch64, R | arm (v6+ or Linux/Android) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | | arm (except for M-profile) \[3] | i64,u64 | ✓ | ✓ | | aarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| arm64ec \[10] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | | riscv32 | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | | riscv32 (+zacas) \[4] | i64,u64 | ✓ | ✓ | | riscv64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓\[1] | | riscv64 (+zacas) \[4] | i128,u128 | ✓ | ✓ | | loongarch64 | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| loongarch32 \[8] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| arm64ec \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| s390x \[7] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | -| powerpc \[9] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| powerpc64 \[9] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| powerpc64 (+quadword-atomics) \[5] \[9] | i128,u128 | ✓ | ✓ | -| mips / mips32r6 \[10] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | -| mips64 / mips64r6 \[10] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| msp430 \[10] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | -| avr \[10] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | -| sparc \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| sparc64 \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| hexagon \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | -| m68k \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| m68k (+isa-68020) \[6] \[10] (experimental) | i64,u64 | ✓ | ✓ | -| csky \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | -| xtensa \[10] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| loongarch32 \[11] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| s390x \[10] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64,i128,u128 | ✓ | ✓ | +| powerpc \[12] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| powerpc64 \[12] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| powerpc64 (+quadword-atomics) \[6] \[12] | i128,u128 | ✓ | ✓ | +| mips / mips32r6 \[13] | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓ | +| mips64 / mips64r6 \[13] | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| sparc \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| sparc (+v8plus) \[8] \[13] (experimental) | i64,u64 | ✓ | ✓ | +| sparc64 \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| avr \[13] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | +| msp430 \[13] (experimental) | isize,usize,i8,u8,i16,u16 | ✓ | ✓ | +| hexagon \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32,i64,u64 | ✓ | ✓ | +| m68k \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| m68k (+isa-68020) \[9] \[13] (experimental) | i64,u64 | ✓ | ✓ | +| csky \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | +| xtensa \[13] (experimental) | isize,usize,i8,u8,i16,u16,i32,u32 | ✓ | ✓\[1] | \[1] Arm's RMW operations are not available on Armv6-M (thumbv6m). RISC-V's RMW operations are not available on targets without the A (or G which means IMAFD) or Zalrsc or Zacas extension, such as riscv32i, riscv32imc, etc. 32-bit SPARC's RMW operations requires `v9` or `leoncasa` target feature (enabled by default on Linux). M68k's atomic RMW operations requires target-cpu M68020+ (enabled by default on Linux). C-SKY's atomic RMW operations requires target-cpu ck860\* or c860\* (enabled by default on the hard-float target). Xtensa's atomic RMW operations are not available on esp32s2.
\[2] Requires `cmpxchg16b` target feature (enabled by default on Apple, Windows (except Windows 7), and Fuchsia targets).
\[3] Armv6+ or Linux/Android, except for M-profile architecture such as thumbv6m, thumbv7m, etc.
\[4] Requires `zacas` target feature.
-\[5] Requires `quadword-atomics` target feature (enabled by default on powerpc64le).
-\[6] Requires target-cpu M68020+ (enabled by default on Linux).
-\[7] Requires Rust 1.84+.
-\[8] Requires Rust 1.91+.
-\[9] Requires Rust 1.95+.
-\[10] Requires nightly due to `#![feature(asm_experimental_arch)]`.
+\[6] Requires `quadword-atomics` target feature (enabled by default on powerpc64le).
+\[8] Requires `v9` and `v8plus` target features (both enabled by default on Linux).
+\[9] Requires target-cpu M68020+ (enabled by default on Linux).
+\[10] Requires Rust 1.84+.
+\[11] Requires Rust 1.91+.
+\[12] Requires Rust 1.95+.
+\[13] Requires nightly due to `#![feature(asm_experimental_arch)]`.
+ + See also [Atomic operation overview by architecture](https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/README.md) for more information about atomic operations in these architectures. @@ -779,7 +783,7 @@ macro_rules! int { impl raw::Primitive for $ty {} const _: () = { assert!(mem::size_of::>() == mem::size_of::<$ty>()); - assert!(mem::align_of::>() == mem::size_of::<$ty>()); + assert!(mem::align_of::>() >= mem::size_of::<$ty>()); }; // SAFETY: the static assertion above ensures safety requirement. unsafe impl private::PrimitivePriv for $ty { @@ -966,7 +970,6 @@ mod private { pub(crate) type AlignPtr = Align16; // Check that all cfg_ macros work. - #[allow(unused_imports)] use crate::{ AtomicMaybeUninit, cfg_has_atomic_8, cfg_has_atomic_16, cfg_has_atomic_32, cfg_has_atomic_64, cfg_has_atomic_128, cfg_has_atomic_cas, cfg_has_atomic_ptr, diff --git a/tests/asm-test/asm/atomic-maybe-uninit/sparcv8plus.asm b/tests/asm-test/asm/atomic-maybe-uninit/sparcv8plus.asm index 217d4597..3e6a9dfe 100644 --- a/tests/asm-test/asm/atomic-maybe-uninit/sparcv8plus.asm +++ b/tests/asm-test/asm/atomic-maybe-uninit/sparcv8plus.asm @@ -1225,6 +1225,390 @@ asm_test::compare_exchange::u32::release_relaxed: ret restore %g0, %i2, %o0 +asm_test::compare_exchange::u64::acqrel_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::seqcst_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::acqrel_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::acqrel_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::acquire_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::relaxed_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::release_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::seqcst_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::seqcst_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::acquire_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::acquire_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::relaxed_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::relaxed_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::release_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange::u64::release_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + asm_test::compare_exchange_weak::u8::acqrel_seqcst: save %sp, -96, %sp and %i0, -4, %i3 @@ -2452,6 +2836,390 @@ asm_test::compare_exchange_weak::u32::release_relaxed: ret restore %g0, %i2, %o0 +asm_test::compare_exchange_weak::u64::acqrel_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::seqcst_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::acqrel_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::acqrel_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::acquire_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::relaxed_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::release_seqcst: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::seqcst_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::seqcst_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::acquire_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::acquire_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::relaxed_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::relaxed_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::release_acquire: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + +asm_test::compare_exchange_weak::u64::release_relaxed: + save %sp, -112, %sp + mov %i4, %g3 + mov %i2, %l1 + ld [ %fp + 0x40 ], %i4 + mov %i1, %l0 + sttw %l0, [ %fp + -16 ] + add %fp, -16, %i1 + add %fp, -8, %i2 + mov %i3, %g2 + sttw %g2, [ %fp + -8 ] + ldx [ %i1 ], %o0 + ldx [ %i2 ], %o1 + membar #StoreStore|#LoadStore + casx [ %i0 ], %o0, %o1 + cmp %o1, %o0 + stx %o1, [ %i2 ] + mov %g0, %i2 + move %xcc, 1, %i2 + ldtw [ %fp + -8 ], %i0 + sttw %i0, [ %i4 ] + stb %i2, [ %i4 + 8 ] + jmp %i7 + 0xc + restore %g0, %i4, %o0 + asm_test::load::u8::seqcst: save %sp, -96, %sp ldub [ %i0 ], %i0 @@ -2512,6 +3280,35 @@ asm_test::load::u32::relaxed: ret restore +asm_test::load::u64::seqcst: + save %sp, -104, %sp + add %fp, -8, %i1 + ldx [ %i0 ], %o0 + membar #LoadStore|#LoadLoad + stx %o0, [ %i1 ] + ldtw [ %fp + -8 ], %i0 + ret + restore + +asm_test::load::u64::acquire: + save %sp, -104, %sp + add %fp, -8, %i1 + ldx [ %i0 ], %o0 + membar #LoadStore|#LoadLoad + stx %o0, [ %i1 ] + ldtw [ %fp + -8 ], %i0 + ret + restore + +asm_test::load::u64::relaxed: + save %sp, -104, %sp + add %fp, -8, %i1 + ldx [ %i0 ], %o0 + stx %o0, [ %i1 ] + ldtw [ %fp + -8 ], %i0 + ret + restore + asm_test::swap::u8::acqrel: save %sp, -96, %sp and %i0, -4, %i2 @@ -2805,6 +3602,132 @@ asm_test::swap::u32::release: ret restore %g0, %i2, %o0 +asm_test::swap::u64::acqrel: + save %sp, -112, %sp + mov %i2, %i5 + add %fp, -16, %i3 + add %fp, -8, %i2 + mov %i1, %i4 + sttw %i4, [ %fp + -16 ] + mov %i3, %i1 + ldx [ %i1 ], %o0 + membar #StoreStore|#LoadStore + ldx [ %i0 ], %o2 +0: + mov %o0, %o1 + casx [ %i0 ], %o2, %o1 + cmp %o1, %o2 + mov %g0, %i3 + move %xcc, 1, %i3 + cmp %i3, 1 + bne,a %icc, 0b + mov %o1, %o2 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + ldtw [ %fp + -8 ], %i0 + ret + restore + +asm_test::swap::u64::seqcst: + save %sp, -112, %sp + mov %i2, %i5 + add %fp, -16, %i3 + add %fp, -8, %i2 + mov %i1, %i4 + sttw %i4, [ %fp + -16 ] + mov %i3, %i1 + ldx [ %i1 ], %o0 + membar #StoreStore|#LoadStore + ldx [ %i0 ], %o2 +0: + mov %o0, %o1 + casx [ %i0 ], %o2, %o1 + cmp %o1, %o2 + mov %g0, %i3 + move %xcc, 1, %i3 + cmp %i3, 1 + bne,a %icc, 0b + mov %o1, %o2 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + ldtw [ %fp + -8 ], %i0 + ret + restore + +asm_test::swap::u64::acquire: + save %sp, -112, %sp + mov %i2, %i5 + add %fp, -16, %i3 + add %fp, -8, %i2 + mov %i1, %i4 + sttw %i4, [ %fp + -16 ] + mov %i3, %i1 + ldx [ %i1 ], %o0 + ldx [ %i0 ], %o2 +0: + mov %o0, %o1 + casx [ %i0 ], %o2, %o1 + cmp %o1, %o2 + mov %g0, %i3 + move %xcc, 1, %i3 + cmp %i3, 1 + bne,a %icc, 0b + mov %o1, %o2 + membar #LoadStore|#LoadLoad + stx %o1, [ %i2 ] + ldtw [ %fp + -8 ], %i0 + ret + restore + +asm_test::swap::u64::relaxed: + save %sp, -112, %sp + mov %i2, %i5 + add %fp, -16, %i3 + add %fp, -8, %i2 + mov %i1, %i4 + sttw %i4, [ %fp + -16 ] + mov %i3, %i1 + ldx [ %i1 ], %o0 + ldx [ %i0 ], %o2 +0: + mov %o0, %o1 + casx [ %i0 ], %o2, %o1 + cmp %o1, %o2 + mov %g0, %i3 + move %xcc, 1, %i3 + cmp %i3, 1 + bne,a %icc, 0b + mov %o1, %o2 + stx %o1, [ %i2 ] + ldtw [ %fp + -8 ], %i0 + ret + restore + +asm_test::swap::u64::release: + save %sp, -112, %sp + mov %i2, %i5 + add %fp, -16, %i3 + add %fp, -8, %i2 + mov %i1, %i4 + sttw %i4, [ %fp + -16 ] + mov %i3, %i1 + ldx [ %i1 ], %o0 + membar #StoreStore|#LoadStore + ldx [ %i0 ], %o2 +0: + mov %o0, %o1 + casx [ %i0 ], %o2, %o1 + cmp %o1, %o2 + mov %g0, %i3 + move %xcc, 1, %i3 + cmp %i3, 1 + bne,a %icc, 0b + mov %o1, %o2 + stx %o1, [ %i2 ] + ldtw [ %fp + -8 ], %i0 + ret + restore + asm_test::store::u8::seqcst: save %sp, -96, %sp membar #StoreStore|#LoadStore @@ -2867,3 +3790,42 @@ asm_test::store::u32::release: st %i1, [ %i0 ] ret restore + +asm_test::store::u64::seqcst: + save %sp, -104, %sp + mov %i2, %i5 + add %fp, -8, %i2 + mov %i1, %i4 + sttw %i4, [ %fp + -8 ] + mov %i2, %i1 + ldx [ %i1 ], %o0 + membar #StoreStore|#LoadStore + stx %o0, [ %i0 ] + membar #StoreStore|#LoadStore|#StoreLoad|#LoadLoad + ret + restore + +asm_test::store::u64::relaxed: + save %sp, -104, %sp + mov %i2, %i5 + add %fp, -8, %i2 + mov %i1, %i4 + sttw %i4, [ %fp + -8 ] + mov %i2, %i1 + ldx [ %i1 ], %o0 + stx %o0, [ %i0 ] + ret + restore + +asm_test::store::u64::release: + save %sp, -104, %sp + mov %i2, %i5 + add %fp, -8, %i2 + mov %i1, %i4 + sttw %i4, [ %fp + -8 ] + mov %i2, %i1 + ldx [ %i1 ], %o0 + membar #StoreStore|#LoadStore + stx %o0, [ %i0 ] + ret + restore